From b65f3b696957ee2ffd6304fe9fdfae775b2b4fb9 Mon Sep 17 00:00:00 2001 From: YanhuiDua Date: Tue, 23 Jun 2026 04:11:18 +0000 Subject: [PATCH 1/2] delelte tool parser and reasoning parser and reset worker_dir in ut --- .../test_qwen35_vl_moe_async_train_2step.py | 29 ++--- tests/rl/test_rollout_logic.py | 2 - xtuner/v1/rl/rollout/controller.py | 35 ----- xtuner/v1/rl/rollout/parser/__init__.py | 19 --- xtuner/v1/rl/rollout/parser/factory.py | 36 ------ .../rollout/parser/qwen3_reasoning_parser.py | 59 --------- .../v1/rl/rollout/parser/qwen3_tool_parser.py | 115 ----------------- .../rl/rollout/parser/qwen3p5_tool_parser.py | 86 ------------- .../v1/rl/rollout/parser/reasoning_parser.py | 21 --- xtuner/v1/rl/rollout/parser/tool_parser.py | 121 ------------------ xtuner/v1/rl/rollout/worker.py | 14 -- 11 files changed, 10 insertions(+), 527 deletions(-) delete mode 100644 xtuner/v1/rl/rollout/parser/__init__.py delete mode 100644 xtuner/v1/rl/rollout/parser/factory.py delete mode 100644 xtuner/v1/rl/rollout/parser/qwen3_reasoning_parser.py delete mode 100644 xtuner/v1/rl/rollout/parser/qwen3_tool_parser.py delete mode 100644 xtuner/v1/rl/rollout/parser/qwen3p5_tool_parser.py delete mode 100644 xtuner/v1/rl/rollout/parser/reasoning_parser.py delete mode 100644 xtuner/v1/rl/rollout/parser/tool_parser.py diff --git a/tests/rl/test_qwen35_vl_moe_async_train_2step.py b/tests/rl/test_qwen35_vl_moe_async_train_2step.py index 32afc94741..1f871288de 100644 --- a/tests/rl/test_qwen35_vl_moe_async_train_2step.py +++ b/tests/rl/test_qwen35_vl_moe_async_train_2step.py @@ -20,6 +20,7 @@ import json import math import os +import tempfile import time import unittest from pathlib import Path @@ -68,21 +69,8 @@ MAX_PROMPT_LENGTH = 4096 MAX_RESPONSE_LENGTH = 2048 PACK_MAX_LENGTH = 8192 -MISMATCH_KL_MAX = float( - os.environ.get( - "XTUNER_TRAIN_2STEP_MISMATCH_KL_MAX", - os.environ.get("XTUNER_PR_REAL_SMOKE_MISMATCH_KL_MAX", "0.005"), - ) -) -MISMATCH_K3_KL_MAX = float( - os.environ.get( - "XTUNER_TRAIN_2STEP_MISMATCH_K3_KL_MAX", - os.environ.get("XTUNER_PR_REAL_SMOKE_MISMATCH_K3_KL_MAX", "0.005"), - ) -) -RUN_ROOT = Path( - os.environ.get("XTUNER_TRAIN_2STEP_RUN_ROOT", os.environ.get("XTUNER_PR_REAL_SMOKE_RUN_ROOT", ".")) -).resolve() +MISMATCH_KL_MAX = 0.005 +MISMATCH_K3_KL_MAX = 0.005 REQUIRED_STEP_METRICS = ( "mismatch/mismatch_kl", @@ -111,9 +99,12 @@ def setUp(self): if not DATA_PATH.exists(): raise FileNotFoundError(f"Long-tail training dataset does not exist: {DATA_PATH}") - self.temp_dir = RUN_ROOT / f"{EXPERIMENT_NAME}_{time.strftime('%Y%m%d%H%M%S')}_{os.getpid()}" - self.temp_dir.mkdir(parents=True, exist_ok=False) - print(f"qwen35 vl moe async train 2-step work dir: {self.temp_dir}") + self.temp_dir = tempfile.TemporaryDirectory( + prefix=f"{EXPERIMENT_NAME}_{time.strftime('%Y%m%d%H%M%S')}_{os.getpid()}_", + ) + self.addCleanup(self.temp_dir.cleanup) + self.temp_dir_path = Path(self.temp_dir.name) + print(f"qwen35 vl moe async train 2-step temp dir: {self.temp_dir_path}") self.produce_calls: list[dict[str, Any]] = [] self.produce_results: list[ProduceBatchResult] = [] self.update_weight_calls = 0 @@ -137,7 +128,7 @@ def tearDown(self): self._restore_env() def test_qwen35_vl_moe_async_train_2step_and_metrics(self): - work_dir = Path(self.temp_dir) / "work_dir" + work_dir = self.temp_dir_path / "work_dir" work_dir.mkdir(parents=True, exist_ok=True) start_s = time.perf_counter() diff --git a/tests/rl/test_rollout_logic.py b/tests/rl/test_rollout_logic.py index f6f1ee2817..e2e78768b1 100644 --- a/tests/rl/test_rollout_logic.py +++ b/tests/rl/test_rollout_logic.py @@ -89,8 +89,6 @@ def _build_controller(self, router): controller.config = SimpleNamespace(rollout_timeout=1.0, random_seed=0) controller.timeout_multiplier = 1.0 controller.router = router - controller._tool_call_parser = None - controller._reasoning_parser = None controller.logger = MagicMock() return controller diff --git a/xtuner/v1/rl/rollout/controller.py b/xtuner/v1/rl/rollout/controller.py index c59ffb25ad..8b989c7943 100644 --- a/xtuner/v1/rl/rollout/controller.py +++ b/xtuner/v1/rl/rollout/controller.py @@ -6,15 +6,11 @@ from ray.actor import ActorProxy from ray.util.placement_group import PlacementGroup -from transformers import AutoTokenizer from xtuner.v1.data_proto.rl_data import RolloutState, Status from xtuner.v1.rl.utils import AutoAcceleratorWorkers from xtuner.v1.utils import XTUNER_DETERMINISTIC, get_logger from .health_manager import ROLLOUT_RAY_GET_TIMEOUT, RolloutHealthManager -from .parser.factory import build_reasoning_parser, build_tool_call_parser -from .parser.reasoning_parser import ReasoningParser -from .parser.tool_parser import ToolCallParser from .utils import SessionRouter from .worker import ( ROLLOUT_CONCURRENCY_GROUP_GENERATE, @@ -62,7 +58,6 @@ def __init__( registry=self.registry, ) self.health_manager.start() - self._tool_call_parser, self._reasoning_parser = self._build_output_parsers() def get_rollout_metadata(self) -> RolloutWorkerMetadata: """Get information about the current rollout setup. @@ -76,20 +71,6 @@ def get_rollout_metadata(self) -> RolloutWorkerMetadata: self.logger.info(f"Rollout worker session server URLs: {rollout_metadata['worker_session_url_dict']}") return rollout_metadata - def _build_output_parsers(self) -> tuple[ToolCallParser | None, ReasoningParser | None]: - tool_call_parser = None - reasoning_parser = None - - if self.config.tool_call_parser != "none": - tool_call_parser = build_tool_call_parser(self.config.tool_call_parser) - - if self.config.reasoning_parser != "none": - tokenizer_path = self.config.tokenizer_path or self.config.model_path - tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True) - reasoning_parser = build_reasoning_parser(self.config.reasoning_parser, tokenizer) - - return tool_call_parser, reasoning_parser - def get_generate_concurrency(self) -> int: return self._generate_concurrency @@ -115,7 +96,6 @@ async def generate(self, rollout_state: RolloutState) -> RolloutState: response_ref, timeout=self.config.rollout_timeout * self.timeout_multiplier, ) - self._apply_output_parsers(response_rollout_state) return response_rollout_state except asyncio.TimeoutError: self.logger.error( @@ -128,21 +108,6 @@ async def generate(self, rollout_state: RolloutState) -> RolloutState: ) return rollout_state - def _apply_output_parsers(self, rollout_state: RolloutState) -> None: - """Apply tool-call and reasoning parsers to the rollout state in- - place.""" - if self._tool_call_parser is not None: - parsed = self._tool_call_parser.parse(rollout_state) - rollout_state.tool_calls = parsed.tool_calls - rollout_state.response = parsed.remaining_text or None - if self._reasoning_parser is not None: - parsed_reasoning = self._reasoning_parser.parse(rollout_state) - rollout_state.response = parsed_reasoning.remaining_text - if parsed_reasoning.reasoning_text: - rollout_state.extra_fields["reasoning_text"] = parsed_reasoning.reasoning_text - else: - rollout_state.extra_fields.pop("reasoning_text", None) - def set_enable_partial_rollout(self, enable: bool) -> None: """Propagate enable_partial_rollout flag to all active workers.""" active_workers = self.registry.active_workers() diff --git a/xtuner/v1/rl/rollout/parser/__init__.py b/xtuner/v1/rl/rollout/parser/__init__.py deleted file mode 100644 index a7c3cdf594..0000000000 --- a/xtuner/v1/rl/rollout/parser/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .factory import build_reasoning_parser, build_tool_call_parser -from .qwen3_reasoning_parser import Qwen3ReasoningParser -from .qwen3_tool_parser import Qwen3ToolCallParser -from .qwen3p5_tool_parser import Qwen3p5ToolCallParser -from .reasoning_parser import ParsedReasoningResult, ReasoningParser -from .tool_parser import ParsedToolCallResult, ToolCallParser - - -__all__ = [ - "ParsedReasoningResult", - "ParsedToolCallResult", - "Qwen3ReasoningParser", - "Qwen3p5ToolCallParser", - "Qwen3ToolCallParser", - "ReasoningParser", - "ToolCallParser", - "build_reasoning_parser", - "build_tool_call_parser", -] diff --git a/xtuner/v1/rl/rollout/parser/factory.py b/xtuner/v1/rl/rollout/parser/factory.py deleted file mode 100644 index 86cf37e4ce..0000000000 --- a/xtuner/v1/rl/rollout/parser/factory.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import annotations - -from typing import Literal - -from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast - -from .qwen3_reasoning_parser import Qwen3ReasoningParser, extract_qwen3_reasoning_strip_tokens -from .qwen3_tool_parser import Qwen3ToolCallParser -from .qwen3p5_tool_parser import Qwen3p5ToolCallParser -from .reasoning_parser import ReasoningParser -from .tool_parser import ToolCallParser - - -ToolCallParserName = Literal["none", "qwen3", "qwen3p5"] -ReasoningParserName = Literal["none", "qwen3"] - - -def build_tool_call_parser(parser_name: ToolCallParserName) -> ToolCallParser | None: - if parser_name == "none": - return None - if parser_name == "qwen3": - return Qwen3ToolCallParser() - if parser_name == "qwen3p5": - return Qwen3p5ToolCallParser() - raise ValueError(f"Unsupported tool_call_parser: {parser_name}") - - -def build_reasoning_parser( - parser_name: ReasoningParserName, - tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, -) -> ReasoningParser | None: - if parser_name == "none": - return None - if parser_name == "qwen3": - return Qwen3ReasoningParser(strip_tokens=extract_qwen3_reasoning_strip_tokens(tokenizer)) - raise ValueError(f"Unsupported reasoning_parser: {parser_name}") diff --git a/xtuner/v1/rl/rollout/parser/qwen3_reasoning_parser.py b/xtuner/v1/rl/rollout/parser/qwen3_reasoning_parser.py deleted file mode 100644 index 7ee7dad8b4..0000000000 --- a/xtuner/v1/rl/rollout/parser/qwen3_reasoning_parser.py +++ /dev/null @@ -1,59 +0,0 @@ -from __future__ import annotations - -import re - -from xtuner.v1.data_proto.rl_data import RolloutState - -from .reasoning_parser import ParsedReasoningResult, ReasoningParser - - -class Qwen3ReasoningParser(ReasoningParser): - _reasoning_pattern = re.compile(r"\s*(.*?)\s*", re.DOTALL) - - def __init__(self, strip_tokens: list[str] | None = None): - self._strip_tokens = strip_tokens or [] - - def parse(self, rollout_state: RolloutState) -> ParsedReasoningResult: - text = rollout_state.response or "" - if not text: - return ParsedReasoningResult() - cleaned = text - for token in self._strip_tokens: - cleaned = cleaned.replace(token, "") - reasoning_chunks = [ - match.group(1).strip() for match in self._reasoning_pattern.finditer(cleaned) if match.group(1).strip() - ] - content = self._reasoning_pattern.sub("", cleaned).strip() - if not reasoning_chunks and "" in cleaned: - prefix, suffix = cleaned.split("", 1) - content = prefix.strip() - truncated_reasoning = suffix.replace("", "").strip() - if truncated_reasoning: - reasoning_chunks.append(truncated_reasoning) - elif not reasoning_chunks and "" in cleaned: - reasoning_text, content = cleaned.split("", 1) - reasoning_text = reasoning_text.strip() - if reasoning_text: - reasoning_chunks.append(reasoning_text) - content = content.strip() - reasoning = "\n".join(reasoning_chunks).strip() or None - return ParsedReasoningResult(reasoning_text=reasoning, remaining_text=content or None) - - -def extract_qwen3_reasoning_strip_tokens( - tokenizer, -) -> list[str]: - strip_tokens: list[str] = [] - - eos_token = getattr(tokenizer, "eos_token", None) - if isinstance(eos_token, str) and eos_token: - strip_tokens.append(eos_token) - - for token in getattr(tokenizer, "additional_special_tokens", []) or []: - if not isinstance(token, str): - continue - lowered = token.lower() - if any(marker in lowered for marker in ("im_end", "eot", "end_of_turn", "turn_end")): - strip_tokens.append(token) - - return list(dict.fromkeys(strip_tokens)) diff --git a/xtuner/v1/rl/rollout/parser/qwen3_tool_parser.py b/xtuner/v1/rl/rollout/parser/qwen3_tool_parser.py deleted file mode 100644 index f7eab82539..0000000000 --- a/xtuner/v1/rl/rollout/parser/qwen3_tool_parser.py +++ /dev/null @@ -1,115 +0,0 @@ -from __future__ import annotations - -import re -from typing import Any -from uuid import uuid4 - -from xtuner.v1.data_proto.rl_data import RolloutToolCall - -from .tool_parser import ( - ParsedToolCallResult, - ToolCallParser, - build_rollout_tool_call, - coerce_parameter_value, - parse_json_or_python_mapping, -) - - -class Qwen3ToolCallParser(ToolCallParser): - _tool_call_pattern = re.compile(r"\n*(.*?)", re.DOTALL) - _qwen_function_pattern = re.compile(r"\n]+)>(.*?)", re.DOTALL) - _qwen_parameter_pattern = re.compile(r"\n]+)>(.*?)", re.DOTALL) - _xml_tag_pattern = re.compile(r"<([a-zA-Z_][^>\n/]*)>(.*?)", re.DOTALL) - - def parse_text(self, text: str) -> ParsedToolCallResult: - if not text: - return ParsedToolCallResult() - cleaned_text, tool_calls = self._extract_tool_call_tags(text) - cleaned_text, qwen_tool_calls = self._extract_qwen_function_calls(cleaned_text) - tool_calls.extend(qwen_tool_calls) - return ParsedToolCallResult(remaining_text=cleaned_text.strip(), tool_calls=tool_calls) - - def should_reject_unparsed_markup( - self, - *, - has_tools: bool, - text: str | None, - parsed_tool_calls: list[Any] | None, - ) -> bool: - if not has_tools: - return False - if parsed_tool_calls: - return False - if not text: - return False - return any(marker in text for marker in ("", "", " tuple[str, list[RolloutToolCall]]: - tool_calls: list[RolloutToolCall] = [] - text_parts: list[str] = [] - last_end = 0 - for match in self._qwen_function_pattern.finditer(text): - if match.start() > last_end: - text_parts.append(text[last_end : match.start()]) - parsed_tool_call = self._parse_qwen_function_call(match.group(1).strip(), match.group(2)) - if parsed_tool_call is None: - text_parts.append(match.group(0)) - else: - tool_calls.append(parsed_tool_call) - last_end = match.end() - if last_end < len(text): - text_parts.append(text[last_end:]) - return "".join(text_parts), tool_calls - - def _parse_single_textual_tool_call(self, raw_payload: str) -> RolloutToolCall | None: - payload = parse_json_or_python_mapping(raw_payload) - if isinstance(payload, dict) and payload.get("name"): - arguments = payload.get("arguments", payload.get("parameters", {})) - return build_rollout_tool_call( - name=str(payload["name"]), - arguments=arguments, - call_id=str(payload.get("id") or f"call_{uuid4().hex}"), - ) - function_match = self._qwen_function_pattern.search(raw_payload) - if function_match is None: - return None - return self._parse_qwen_function_call(function_match.group(1).strip(), function_match.group(2)) - - def _parse_qwen_function_call(self, function_name: str, function_body: str) -> RolloutToolCall | None: - arguments: dict[str, Any] = {} - for parameter_match in self._qwen_parameter_pattern.finditer(function_body): - param_name = parameter_match.group(1).strip() - param_value = parameter_match.group(2).strip() - arguments[param_name] = coerce_parameter_value(param_value) - if not arguments: - for tag_match in self._xml_tag_pattern.finditer(function_body): - tag_name = tag_match.group(1).strip() - if tag_name.startswith("function="): - continue - tag_value = tag_match.group(2).strip() - if tag_name in {"path", "file_path"}: - arguments[tag_name] = tag_value - else: - arguments[tag_name] = coerce_parameter_value(tag_value) - return build_rollout_tool_call( - name=function_name, - arguments=arguments, - call_id=f"call_{uuid4().hex}", - ) diff --git a/xtuner/v1/rl/rollout/parser/qwen3p5_tool_parser.py b/xtuner/v1/rl/rollout/parser/qwen3p5_tool_parser.py deleted file mode 100644 index eb39733058..0000000000 --- a/xtuner/v1/rl/rollout/parser/qwen3p5_tool_parser.py +++ /dev/null @@ -1,86 +0,0 @@ -from __future__ import annotations - -import re -from typing import Any -from uuid import uuid4 - -from xtuner.v1.data_proto.rl_data import RolloutToolCall - -from .tool_parser import ParsedToolCallResult, ToolCallParser, build_rollout_tool_call, coerce_parameter_value - - -class Qwen3p5ToolCallParser(ToolCallParser): - _tool_call_pattern = re.compile(r"\n*(.*?)", re.DOTALL) - _parameter_pattern = re.compile(r"\n]+)>(.*?)", re.DOTALL) - - def parse_text(self, text: str) -> ParsedToolCallResult: - if not text: - return ParsedToolCallResult() - cleaned_text, tool_calls = self._extract_tool_call_tags(text) - return ParsedToolCallResult(remaining_text=cleaned_text.strip(), tool_calls=tool_calls) - - def should_reject_unparsed_markup( - self, - *, - has_tools: bool, - text: str | None, - parsed_tool_calls: list[Any] | None, - ) -> bool: - if not has_tools: - return False - if parsed_tool_calls: - return False - if not text: - return False - return any(marker in text for marker in ("", "", " RolloutToolCall | None: - function_name = self._extract_function_name(raw_payload) - if not function_name: - return None - - arguments: dict[str, Any] = {} - for parameter_match in self._parameter_pattern.finditer(raw_payload): - parameter_name = parameter_match.group(1).strip() - parameter_value = parameter_match.group(2).strip() - arguments[parameter_name] = coerce_parameter_value(parameter_value) - - return build_rollout_tool_call( - name=function_name, - arguments=arguments, - call_id=f"call_{uuid4().hex}", - ) - - def _extract_function_name(self, raw_payload: str) -> str | None: - function_start = raw_payload.find("", name_start), raw_payload.find("\n", name_start)) if index != -1 - ] - if not terminators: - return None - - function_name = raw_payload[name_start : min(terminators)].strip() - return function_name or None diff --git a/xtuner/v1/rl/rollout/parser/reasoning_parser.py b/xtuner/v1/rl/rollout/parser/reasoning_parser.py deleted file mode 100644 index a6ec85b542..0000000000 --- a/xtuner/v1/rl/rollout/parser/reasoning_parser.py +++ /dev/null @@ -1,21 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod - -from pydantic import BaseModel, ConfigDict - -from xtuner.v1.data_proto.rl_data import RolloutState - - -class ParsedReasoningResult(BaseModel): - model_config = ConfigDict(extra="forbid") - - reasoning_text: str | None = None - remaining_text: str | None = None - - -class ReasoningParser(ABC): - @abstractmethod - def parse(self, rollout_state: RolloutState) -> ParsedReasoningResult: - """Return parsed reasoning and remaining text for a rollout - response.""" diff --git a/xtuner/v1/rl/rollout/parser/tool_parser.py b/xtuner/v1/rl/rollout/parser/tool_parser.py deleted file mode 100644 index 0bf042c5cf..0000000000 --- a/xtuner/v1/rl/rollout/parser/tool_parser.py +++ /dev/null @@ -1,121 +0,0 @@ -from __future__ import annotations - -import ast -import json -from abc import ABC, abstractmethod -from typing import Any - -from pydantic import BaseModel, ConfigDict, Field - -from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast -from xtuner.v1.data_proto.rl_data import RolloutFunctionCall, RolloutState, RolloutToolCall - - -class ParsedToolCallResult(BaseModel): - model_config = ConfigDict(extra="forbid") - - tool_calls: list[RolloutToolCall] = Field(default_factory=list) - remaining_text: str = "" - - -class ToolCallParser(ABC): - def parse(self, rollout_state: RolloutState) -> ParsedToolCallResult: - return self.parse_text(rollout_state.response or "") - - def should_reject_unparsed_markup( - self, - *, - has_tools: bool, - text: str | None, - parsed_tool_calls: list[Any] | None, - ) -> bool: - """Whether the remaining assistant text should be rejected as a - malformed tool call. - - Most parsers do not use textual tool-call markup, so the default behavior is to accept the text. Parsers with - format-specific markup can override this and reject outputs that still contain unparsed tool-call fragments. - """ - return False - - @abstractmethod - def parse_text(self, text: str) -> ParsedToolCallResult: - raise NotImplementedError - - -def extract_tokenizer_token_contents( - tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast | Any, -) -> set[str]: - token_contents: set[str] = set() - - for token in getattr(tokenizer, "additional_special_tokens", []) or []: - if isinstance(token, str): - token_contents.add(token) - - added_tokens_decoder = getattr(tokenizer, "added_tokens_decoder", None) - if isinstance(added_tokens_decoder, dict): - for token_info in added_tokens_decoder.values(): - if isinstance(token_info, str): - token_contents.add(token_info) - elif isinstance(token_info, dict): - content = token_info.get("content") - if isinstance(content, str): - token_contents.add(content) - else: - content = getattr(token_info, "content", None) - if isinstance(content, str): - token_contents.add(content) - - get_vocab = getattr(tokenizer, "get_vocab", None) - if callable(get_vocab): - try: - vocab = get_vocab() - except Exception: - vocab = None - if isinstance(vocab, dict): - token_contents.update(token for token in vocab if isinstance(token, str)) - - return token_contents - - -def parse_json_or_python_mapping(raw_payload: str) -> Any: - try: - return json.loads(raw_payload) - except Exception: - try: - return ast.literal_eval(raw_payload) - except Exception: - return None - - -def coerce_parameter_value(value: str) -> Any: - stripped = value.strip() - if not stripped: - return "" - try: - return json.loads(stripped) - except Exception: - try: - return ast.literal_eval(stripped) - except Exception: - return stripped - - -def build_rollout_tool_call( - *, - name: str, - arguments: Any, - call_id: str, -) -> RolloutToolCall: - raw_arguments_text = arguments if isinstance(arguments, str) else None - parsed_arguments = arguments - if isinstance(arguments, str): - decoded = parse_json_or_python_mapping(arguments) - parsed_arguments = decoded if decoded is not None else {"raw": arguments} - return RolloutToolCall( - id=call_id, - function=RolloutFunctionCall( - name=name, - arguments=parsed_arguments, - raw_arguments_text=raw_arguments_text, - ), - ) diff --git a/xtuner/v1/rl/rollout/worker.py b/xtuner/v1/rl/rollout/worker.py index fd674ac79e..a29386a16e 100644 --- a/xtuner/v1/rl/rollout/worker.py +++ b/xtuner/v1/rl/rollout/worker.py @@ -315,20 +315,6 @@ class RolloutConfig(BaseModel): help="Context length for the rollout worker.", ), ] = None - tool_call_parser: Annotated[ - Literal["none", "qwen3", "qwen3p5"], - Parameter( - group=infer_group, - help='Structured tool-call parser to apply to rollout output. Use "none" to disable parsing, "qwen3" to enable Qwen3 tool-call parsing, or "qwen3p5" to enable Qwen3.5 coder-style tool-call parsing.', - ), - ] = "none" - reasoning_parser: Annotated[ - Literal["none", "qwen3"], - Parameter( - group=infer_group, - help='Reasoning parser to apply to rollout output. Use "none" to disable parsing or "qwen3" to enable Qwen3 parsing.', - ), - ] = "none" enable_float8: Annotated[ bool, Parameter( From 4f9e366a03030f133385796b2634219211b9829a Mon Sep 17 00:00:00 2001 From: YanhuiDua Date: Tue, 23 Jun 2026 08:45:06 +0000 Subject: [PATCH 2/2] fix rl tests rules --- .github/workflows/unit_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit_test.yaml b/.github/workflows/unit_test.yaml index 512ecd7fa4..be8581747d 100644 --- a/.github/workflows/unit_test.yaml +++ b/.github/workflows/unit_test.yaml @@ -41,7 +41,7 @@ jobs: else while IFS= read -r file; do case "$file" in - xtuner/v1/rl/*) + tests/rl/*|xtuner/v1/rl/*) ;; *) only_rl=false