diff --git a/.github/workflows/unit_test.yaml b/.github/workflows/unit_test.yaml
index 512ecd7fa4..be8581747d 100644
--- a/.github/workflows/unit_test.yaml
+++ b/.github/workflows/unit_test.yaml
@@ -41,7 +41,7 @@ jobs:
else
while IFS= read -r file; do
case "$file" in
- xtuner/v1/rl/*)
+ tests/rl/*|xtuner/v1/rl/*)
;;
*)
only_rl=false
diff --git a/tests/rl/test_qwen35_vl_moe_async_train_2step.py b/tests/rl/test_qwen35_vl_moe_async_train_2step.py
index 951bca6396..ae6a1ae0de 100644
--- a/tests/rl/test_qwen35_vl_moe_async_train_2step.py
+++ b/tests/rl/test_qwen35_vl_moe_async_train_2step.py
@@ -20,6 +20,7 @@
import json
import math
import os
+import tempfile
import time
import unittest
from pathlib import Path
@@ -68,21 +69,8 @@
MAX_PROMPT_LENGTH = 4096
MAX_RESPONSE_LENGTH = 2048
PACK_MAX_LENGTH = 8192
-MISMATCH_KL_MAX = float(
- os.environ.get(
- "XTUNER_TRAIN_2STEP_MISMATCH_KL_MAX",
- os.environ.get("XTUNER_PR_REAL_SMOKE_MISMATCH_KL_MAX", "0.005"),
- )
-)
-MISMATCH_K3_KL_MAX = float(
- os.environ.get(
- "XTUNER_TRAIN_2STEP_MISMATCH_K3_KL_MAX",
- os.environ.get("XTUNER_PR_REAL_SMOKE_MISMATCH_K3_KL_MAX", "0.005"),
- )
-)
-RUN_ROOT = Path(
- os.environ.get("XTUNER_TRAIN_2STEP_RUN_ROOT", os.environ.get("XTUNER_PR_REAL_SMOKE_RUN_ROOT", "."))
-).resolve()
+MISMATCH_KL_MAX = 0.005
+MISMATCH_K3_KL_MAX = 0.005
REQUIRED_STEP_METRICS = (
"mismatch/mismatch_kl",
@@ -111,9 +99,12 @@ def setUp(self):
if not DATA_PATH.exists():
raise FileNotFoundError(f"Long-tail training dataset does not exist: {DATA_PATH}")
- self.temp_dir = RUN_ROOT / f"{EXPERIMENT_NAME}_{time.strftime('%Y%m%d%H%M%S')}_{os.getpid()}"
- self.temp_dir.mkdir(parents=True, exist_ok=False)
- print(f"qwen35 vl moe async train 2-step work dir: {self.temp_dir}")
+ self.temp_dir = tempfile.TemporaryDirectory(
+ prefix=f"{EXPERIMENT_NAME}_{time.strftime('%Y%m%d%H%M%S')}_{os.getpid()}_",
+ )
+ self.addCleanup(self.temp_dir.cleanup)
+ self.temp_dir_path = Path(self.temp_dir.name)
+ print(f"qwen35 vl moe async train 2-step temp dir: {self.temp_dir_path}")
self.produce_calls: list[dict[str, Any]] = []
self.produce_results: list[ProduceBatchResult] = []
self.update_weight_calls = 0
@@ -137,7 +128,7 @@ def tearDown(self):
self._restore_env()
def test_qwen35_vl_moe_async_train_2step_and_metrics(self):
- work_dir = Path(self.temp_dir) / "work_dir"
+ work_dir = self.temp_dir_path / "work_dir"
work_dir.mkdir(parents=True, exist_ok=True)
start_s = time.perf_counter()
diff --git a/tests/rl/test_rollout_logic.py b/tests/rl/test_rollout_logic.py
index dd634cd478..cb3f73a47b 100644
--- a/tests/rl/test_rollout_logic.py
+++ b/tests/rl/test_rollout_logic.py
@@ -139,8 +139,6 @@ def _build_controller(self, router):
controller.config = SimpleNamespace(rollout_timeout=1.0, random_seed=0)
controller.timeout_multiplier = 1.0
controller.router = router
- controller._tool_call_parser = None
- controller._reasoning_parser = None
controller.logger = MagicMock()
return controller
diff --git a/xtuner/v1/rl/rollout/controller.py b/xtuner/v1/rl/rollout/controller.py
index b8068e00bb..ae4572334c 100644
--- a/xtuner/v1/rl/rollout/controller.py
+++ b/xtuner/v1/rl/rollout/controller.py
@@ -6,16 +6,12 @@
from ray.actor import ActorProxy
from ray.util.placement_group import PlacementGroup
-from transformers import AutoTokenizer
from xtuner.v1.data_proto.rl_data import RolloutState, Status
from xtuner.v1.rl.utils import AutoAcceleratorWorkers
from xtuner.v1.utils import XTUNER_DETERMINISTIC, get_logger
from .constants import ROLLOUT_RAY_GENERATE_MAX_CONCURRENCY
from .health_manager import ROLLOUT_RAY_GET_TIMEOUT, RolloutHealthManager
-from .parser.factory import build_reasoning_parser, build_tool_call_parser
-from .parser.reasoning_parser import ReasoningParser
-from .parser.tool_parser import ToolCallParser
from .proxy_manager import RolloutProxyManager
from .utils import SessionRouter
from .worker import (
@@ -66,7 +62,6 @@ def __init__(
worker_lifecycle_listeners=[self.proxy_manager] if self.proxy_manager is not None else None,
)
self.health_manager.start()
- self._tool_call_parser, self._reasoning_parser = self._build_output_parsers()
def get_rollout_metadata(self) -> RolloutWorkerMetadata:
"""Get information about the current rollout setup.
@@ -93,20 +88,6 @@ def validate_registered_workers_to_proxy(self) -> None:
return
self.proxy_manager.validate_registered_session_urls()
- def _build_output_parsers(self) -> tuple[ToolCallParser | None, ReasoningParser | None]:
- tool_call_parser = None
- reasoning_parser = None
-
- if self.config.tool_call_parser != "none":
- tool_call_parser = build_tool_call_parser(self.config.tool_call_parser)
-
- if self.config.reasoning_parser != "none":
- tokenizer_path = self.config.tokenizer_path or self.config.model_path
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
- reasoning_parser = build_reasoning_parser(self.config.reasoning_parser, tokenizer)
-
- return tool_call_parser, reasoning_parser
-
@ray.method(concurrency_group=ROLLOUT_CONCURRENCY_GROUP_GENERATE)
async def generate(self, rollout_state: RolloutState) -> RolloutState:
if XTUNER_DETERMINISTIC:
@@ -129,7 +110,6 @@ async def generate(self, rollout_state: RolloutState) -> RolloutState:
response_ref,
timeout=self.config.rollout_timeout * self.timeout_multiplier,
)
- self._apply_output_parsers(response_rollout_state)
return response_rollout_state
except asyncio.TimeoutError:
self.logger.error(
@@ -142,21 +122,6 @@ async def generate(self, rollout_state: RolloutState) -> RolloutState:
)
return rollout_state
- def _apply_output_parsers(self, rollout_state: RolloutState) -> None:
- """Apply tool-call and reasoning parsers to the rollout state in-
- place."""
- if self._tool_call_parser is not None:
- parsed = self._tool_call_parser.parse(rollout_state)
- rollout_state.tool_calls = parsed.tool_calls
- rollout_state.response = parsed.remaining_text or None
- if self._reasoning_parser is not None:
- parsed_reasoning = self._reasoning_parser.parse(rollout_state)
- rollout_state.response = parsed_reasoning.remaining_text
- if parsed_reasoning.reasoning_text:
- rollout_state.extra_fields["reasoning_text"] = parsed_reasoning.reasoning_text
- else:
- rollout_state.extra_fields.pop("reasoning_text", None)
-
def set_enable_partial_rollout(self, enable: bool) -> None:
"""Propagate enable_partial_rollout flag to all active workers."""
active_workers = self.registry.active_workers()
diff --git a/xtuner/v1/rl/rollout/parser/__init__.py b/xtuner/v1/rl/rollout/parser/__init__.py
deleted file mode 100644
index a7c3cdf594..0000000000
--- a/xtuner/v1/rl/rollout/parser/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from .factory import build_reasoning_parser, build_tool_call_parser
-from .qwen3_reasoning_parser import Qwen3ReasoningParser
-from .qwen3_tool_parser import Qwen3ToolCallParser
-from .qwen3p5_tool_parser import Qwen3p5ToolCallParser
-from .reasoning_parser import ParsedReasoningResult, ReasoningParser
-from .tool_parser import ParsedToolCallResult, ToolCallParser
-
-
-__all__ = [
- "ParsedReasoningResult",
- "ParsedToolCallResult",
- "Qwen3ReasoningParser",
- "Qwen3p5ToolCallParser",
- "Qwen3ToolCallParser",
- "ReasoningParser",
- "ToolCallParser",
- "build_reasoning_parser",
- "build_tool_call_parser",
-]
diff --git a/xtuner/v1/rl/rollout/parser/factory.py b/xtuner/v1/rl/rollout/parser/factory.py
deleted file mode 100644
index 86cf37e4ce..0000000000
--- a/xtuner/v1/rl/rollout/parser/factory.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import annotations
-
-from typing import Literal
-
-from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
-
-from .qwen3_reasoning_parser import Qwen3ReasoningParser, extract_qwen3_reasoning_strip_tokens
-from .qwen3_tool_parser import Qwen3ToolCallParser
-from .qwen3p5_tool_parser import Qwen3p5ToolCallParser
-from .reasoning_parser import ReasoningParser
-from .tool_parser import ToolCallParser
-
-
-ToolCallParserName = Literal["none", "qwen3", "qwen3p5"]
-ReasoningParserName = Literal["none", "qwen3"]
-
-
-def build_tool_call_parser(parser_name: ToolCallParserName) -> ToolCallParser | None:
- if parser_name == "none":
- return None
- if parser_name == "qwen3":
- return Qwen3ToolCallParser()
- if parser_name == "qwen3p5":
- return Qwen3p5ToolCallParser()
- raise ValueError(f"Unsupported tool_call_parser: {parser_name}")
-
-
-def build_reasoning_parser(
- parser_name: ReasoningParserName,
- tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
-) -> ReasoningParser | None:
- if parser_name == "none":
- return None
- if parser_name == "qwen3":
- return Qwen3ReasoningParser(strip_tokens=extract_qwen3_reasoning_strip_tokens(tokenizer))
- raise ValueError(f"Unsupported reasoning_parser: {parser_name}")
diff --git a/xtuner/v1/rl/rollout/parser/qwen3_reasoning_parser.py b/xtuner/v1/rl/rollout/parser/qwen3_reasoning_parser.py
deleted file mode 100644
index 7ee7dad8b4..0000000000
--- a/xtuner/v1/rl/rollout/parser/qwen3_reasoning_parser.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from __future__ import annotations
-
-import re
-
-from xtuner.v1.data_proto.rl_data import RolloutState
-
-from .reasoning_parser import ParsedReasoningResult, ReasoningParser
-
-
-class Qwen3ReasoningParser(ReasoningParser):
- _reasoning_pattern = re.compile(r"\s*(.*?)\s*", re.DOTALL)
-
- def __init__(self, strip_tokens: list[str] | None = None):
- self._strip_tokens = strip_tokens or []
-
- def parse(self, rollout_state: RolloutState) -> ParsedReasoningResult:
- text = rollout_state.response or ""
- if not text:
- return ParsedReasoningResult()
- cleaned = text
- for token in self._strip_tokens:
- cleaned = cleaned.replace(token, "")
- reasoning_chunks = [
- match.group(1).strip() for match in self._reasoning_pattern.finditer(cleaned) if match.group(1).strip()
- ]
- content = self._reasoning_pattern.sub("", cleaned).strip()
- if not reasoning_chunks and "" in cleaned:
- prefix, suffix = cleaned.split("", 1)
- content = prefix.strip()
- truncated_reasoning = suffix.replace("", "").strip()
- if truncated_reasoning:
- reasoning_chunks.append(truncated_reasoning)
- elif not reasoning_chunks and "" in cleaned:
- reasoning_text, content = cleaned.split("", 1)
- reasoning_text = reasoning_text.strip()
- if reasoning_text:
- reasoning_chunks.append(reasoning_text)
- content = content.strip()
- reasoning = "\n".join(reasoning_chunks).strip() or None
- return ParsedReasoningResult(reasoning_text=reasoning, remaining_text=content or None)
-
-
-def extract_qwen3_reasoning_strip_tokens(
- tokenizer,
-) -> list[str]:
- strip_tokens: list[str] = []
-
- eos_token = getattr(tokenizer, "eos_token", None)
- if isinstance(eos_token, str) and eos_token:
- strip_tokens.append(eos_token)
-
- for token in getattr(tokenizer, "additional_special_tokens", []) or []:
- if not isinstance(token, str):
- continue
- lowered = token.lower()
- if any(marker in lowered for marker in ("im_end", "eot", "end_of_turn", "turn_end")):
- strip_tokens.append(token)
-
- return list(dict.fromkeys(strip_tokens))
diff --git a/xtuner/v1/rl/rollout/parser/qwen3_tool_parser.py b/xtuner/v1/rl/rollout/parser/qwen3_tool_parser.py
deleted file mode 100644
index f7eab82539..0000000000
--- a/xtuner/v1/rl/rollout/parser/qwen3_tool_parser.py
+++ /dev/null
@@ -1,115 +0,0 @@
-from __future__ import annotations
-
-import re
-from typing import Any
-from uuid import uuid4
-
-from xtuner.v1.data_proto.rl_data import RolloutToolCall
-
-from .tool_parser import (
- ParsedToolCallResult,
- ToolCallParser,
- build_rollout_tool_call,
- coerce_parameter_value,
- parse_json_or_python_mapping,
-)
-
-
-class Qwen3ToolCallParser(ToolCallParser):
- _tool_call_pattern = re.compile(r"\n*(.*?)", re.DOTALL)
- _qwen_function_pattern = re.compile(r"\n]+)>(.*?)", re.DOTALL)
- _qwen_parameter_pattern = re.compile(r"\n]+)>(.*?)", re.DOTALL)
- _xml_tag_pattern = re.compile(r"<([a-zA-Z_][^>\n/]*)>(.*?)\1>", re.DOTALL)
-
- def parse_text(self, text: str) -> ParsedToolCallResult:
- if not text:
- return ParsedToolCallResult()
- cleaned_text, tool_calls = self._extract_tool_call_tags(text)
- cleaned_text, qwen_tool_calls = self._extract_qwen_function_calls(cleaned_text)
- tool_calls.extend(qwen_tool_calls)
- return ParsedToolCallResult(remaining_text=cleaned_text.strip(), tool_calls=tool_calls)
-
- def should_reject_unparsed_markup(
- self,
- *,
- has_tools: bool,
- text: str | None,
- parsed_tool_calls: list[Any] | None,
- ) -> bool:
- if not has_tools:
- return False
- if parsed_tool_calls:
- return False
- if not text:
- return False
- return any(marker in text for marker in ("", "", " tuple[str, list[RolloutToolCall]]:
- tool_calls: list[RolloutToolCall] = []
- text_parts: list[str] = []
- last_end = 0
- for match in self._tool_call_pattern.finditer(text):
- if match.start() > last_end:
- text_parts.append(text[last_end : match.start()])
- parsed_tool_call = self._parse_single_textual_tool_call(match.group(1).strip())
- if parsed_tool_call is None:
- text_parts.append(match.group(0))
- else:
- tool_calls.append(parsed_tool_call)
- last_end = match.end()
- if last_end < len(text):
- text_parts.append(text[last_end:])
- return "".join(text_parts), tool_calls
-
- def _extract_qwen_function_calls(self, text: str) -> tuple[str, list[RolloutToolCall]]:
- tool_calls: list[RolloutToolCall] = []
- text_parts: list[str] = []
- last_end = 0
- for match in self._qwen_function_pattern.finditer(text):
- if match.start() > last_end:
- text_parts.append(text[last_end : match.start()])
- parsed_tool_call = self._parse_qwen_function_call(match.group(1).strip(), match.group(2))
- if parsed_tool_call is None:
- text_parts.append(match.group(0))
- else:
- tool_calls.append(parsed_tool_call)
- last_end = match.end()
- if last_end < len(text):
- text_parts.append(text[last_end:])
- return "".join(text_parts), tool_calls
-
- def _parse_single_textual_tool_call(self, raw_payload: str) -> RolloutToolCall | None:
- payload = parse_json_or_python_mapping(raw_payload)
- if isinstance(payload, dict) and payload.get("name"):
- arguments = payload.get("arguments", payload.get("parameters", {}))
- return build_rollout_tool_call(
- name=str(payload["name"]),
- arguments=arguments,
- call_id=str(payload.get("id") or f"call_{uuid4().hex}"),
- )
- function_match = self._qwen_function_pattern.search(raw_payload)
- if function_match is None:
- return None
- return self._parse_qwen_function_call(function_match.group(1).strip(), function_match.group(2))
-
- def _parse_qwen_function_call(self, function_name: str, function_body: str) -> RolloutToolCall | None:
- arguments: dict[str, Any] = {}
- for parameter_match in self._qwen_parameter_pattern.finditer(function_body):
- param_name = parameter_match.group(1).strip()
- param_value = parameter_match.group(2).strip()
- arguments[param_name] = coerce_parameter_value(param_value)
- if not arguments:
- for tag_match in self._xml_tag_pattern.finditer(function_body):
- tag_name = tag_match.group(1).strip()
- if tag_name.startswith("function="):
- continue
- tag_value = tag_match.group(2).strip()
- if tag_name in {"path", "file_path"}:
- arguments[tag_name] = tag_value
- else:
- arguments[tag_name] = coerce_parameter_value(tag_value)
- return build_rollout_tool_call(
- name=function_name,
- arguments=arguments,
- call_id=f"call_{uuid4().hex}",
- )
diff --git a/xtuner/v1/rl/rollout/parser/qwen3p5_tool_parser.py b/xtuner/v1/rl/rollout/parser/qwen3p5_tool_parser.py
deleted file mode 100644
index eb39733058..0000000000
--- a/xtuner/v1/rl/rollout/parser/qwen3p5_tool_parser.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from __future__ import annotations
-
-import re
-from typing import Any
-from uuid import uuid4
-
-from xtuner.v1.data_proto.rl_data import RolloutToolCall
-
-from .tool_parser import ParsedToolCallResult, ToolCallParser, build_rollout_tool_call, coerce_parameter_value
-
-
-class Qwen3p5ToolCallParser(ToolCallParser):
- _tool_call_pattern = re.compile(r"\n*(.*?)", re.DOTALL)
- _parameter_pattern = re.compile(r"\n]+)>(.*?)", re.DOTALL)
-
- def parse_text(self, text: str) -> ParsedToolCallResult:
- if not text:
- return ParsedToolCallResult()
- cleaned_text, tool_calls = self._extract_tool_call_tags(text)
- return ParsedToolCallResult(remaining_text=cleaned_text.strip(), tool_calls=tool_calls)
-
- def should_reject_unparsed_markup(
- self,
- *,
- has_tools: bool,
- text: str | None,
- parsed_tool_calls: list[Any] | None,
- ) -> bool:
- if not has_tools:
- return False
- if parsed_tool_calls:
- return False
- if not text:
- return False
- return any(marker in text for marker in ("", "", " tuple[str, list[RolloutToolCall]]:
- tool_calls: list[RolloutToolCall] = []
- text_parts: list[str] = []
- last_end = 0
-
- for match in self._tool_call_pattern.finditer(text):
- if match.start() > last_end:
- text_parts.append(text[last_end : match.start()])
- parsed_tool_call = self._parse_single_tool_call(match.group(1).strip())
- if parsed_tool_call is None:
- text_parts.append(match.group(0))
- else:
- tool_calls.append(parsed_tool_call)
- last_end = match.end()
-
- if last_end < len(text):
- text_parts.append(text[last_end:])
- return "".join(text_parts), tool_calls
-
- def _parse_single_tool_call(self, raw_payload: str) -> RolloutToolCall | None:
- function_name = self._extract_function_name(raw_payload)
- if not function_name:
- return None
-
- arguments: dict[str, Any] = {}
- for parameter_match in self._parameter_pattern.finditer(raw_payload):
- parameter_name = parameter_match.group(1).strip()
- parameter_value = parameter_match.group(2).strip()
- arguments[parameter_name] = coerce_parameter_value(parameter_value)
-
- return build_rollout_tool_call(
- name=function_name,
- arguments=arguments,
- call_id=f"call_{uuid4().hex}",
- )
-
- def _extract_function_name(self, raw_payload: str) -> str | None:
- function_start = raw_payload.find("", name_start), raw_payload.find("\n", name_start)) if index != -1
- ]
- if not terminators:
- return None
-
- function_name = raw_payload[name_start : min(terminators)].strip()
- return function_name or None
diff --git a/xtuner/v1/rl/rollout/parser/reasoning_parser.py b/xtuner/v1/rl/rollout/parser/reasoning_parser.py
deleted file mode 100644
index a6ec85b542..0000000000
--- a/xtuner/v1/rl/rollout/parser/reasoning_parser.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-
-from pydantic import BaseModel, ConfigDict
-
-from xtuner.v1.data_proto.rl_data import RolloutState
-
-
-class ParsedReasoningResult(BaseModel):
- model_config = ConfigDict(extra="forbid")
-
- reasoning_text: str | None = None
- remaining_text: str | None = None
-
-
-class ReasoningParser(ABC):
- @abstractmethod
- def parse(self, rollout_state: RolloutState) -> ParsedReasoningResult:
- """Return parsed reasoning and remaining text for a rollout
- response."""
diff --git a/xtuner/v1/rl/rollout/parser/tool_parser.py b/xtuner/v1/rl/rollout/parser/tool_parser.py
deleted file mode 100644
index 0bf042c5cf..0000000000
--- a/xtuner/v1/rl/rollout/parser/tool_parser.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from __future__ import annotations
-
-import ast
-import json
-from abc import ABC, abstractmethod
-from typing import Any
-
-from pydantic import BaseModel, ConfigDict, Field
-
-from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
-from xtuner.v1.data_proto.rl_data import RolloutFunctionCall, RolloutState, RolloutToolCall
-
-
-class ParsedToolCallResult(BaseModel):
- model_config = ConfigDict(extra="forbid")
-
- tool_calls: list[RolloutToolCall] = Field(default_factory=list)
- remaining_text: str = ""
-
-
-class ToolCallParser(ABC):
- def parse(self, rollout_state: RolloutState) -> ParsedToolCallResult:
- return self.parse_text(rollout_state.response or "")
-
- def should_reject_unparsed_markup(
- self,
- *,
- has_tools: bool,
- text: str | None,
- parsed_tool_calls: list[Any] | None,
- ) -> bool:
- """Whether the remaining assistant text should be rejected as a
- malformed tool call.
-
- Most parsers do not use textual tool-call markup, so the default behavior is to accept the text. Parsers with
- format-specific markup can override this and reject outputs that still contain unparsed tool-call fragments.
- """
- return False
-
- @abstractmethod
- def parse_text(self, text: str) -> ParsedToolCallResult:
- raise NotImplementedError
-
-
-def extract_tokenizer_token_contents(
- tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast | Any,
-) -> set[str]:
- token_contents: set[str] = set()
-
- for token in getattr(tokenizer, "additional_special_tokens", []) or []:
- if isinstance(token, str):
- token_contents.add(token)
-
- added_tokens_decoder = getattr(tokenizer, "added_tokens_decoder", None)
- if isinstance(added_tokens_decoder, dict):
- for token_info in added_tokens_decoder.values():
- if isinstance(token_info, str):
- token_contents.add(token_info)
- elif isinstance(token_info, dict):
- content = token_info.get("content")
- if isinstance(content, str):
- token_contents.add(content)
- else:
- content = getattr(token_info, "content", None)
- if isinstance(content, str):
- token_contents.add(content)
-
- get_vocab = getattr(tokenizer, "get_vocab", None)
- if callable(get_vocab):
- try:
- vocab = get_vocab()
- except Exception:
- vocab = None
- if isinstance(vocab, dict):
- token_contents.update(token for token in vocab if isinstance(token, str))
-
- return token_contents
-
-
-def parse_json_or_python_mapping(raw_payload: str) -> Any:
- try:
- return json.loads(raw_payload)
- except Exception:
- try:
- return ast.literal_eval(raw_payload)
- except Exception:
- return None
-
-
-def coerce_parameter_value(value: str) -> Any:
- stripped = value.strip()
- if not stripped:
- return ""
- try:
- return json.loads(stripped)
- except Exception:
- try:
- return ast.literal_eval(stripped)
- except Exception:
- return stripped
-
-
-def build_rollout_tool_call(
- *,
- name: str,
- arguments: Any,
- call_id: str,
-) -> RolloutToolCall:
- raw_arguments_text = arguments if isinstance(arguments, str) else None
- parsed_arguments = arguments
- if isinstance(arguments, str):
- decoded = parse_json_or_python_mapping(arguments)
- parsed_arguments = decoded if decoded is not None else {"raw": arguments}
- return RolloutToolCall(
- id=call_id,
- function=RolloutFunctionCall(
- name=name,
- arguments=parsed_arguments,
- raw_arguments_text=raw_arguments_text,
- ),
- )
diff --git a/xtuner/v1/rl/rollout/worker.py b/xtuner/v1/rl/rollout/worker.py
index 0f795d7948..c8c0ce2466 100644
--- a/xtuner/v1/rl/rollout/worker.py
+++ b/xtuner/v1/rl/rollout/worker.py
@@ -318,20 +318,6 @@ class RolloutConfig(BaseModel):
help="Context length for the rollout worker.",
),
] = None
- tool_call_parser: Annotated[
- Literal["none", "qwen3", "qwen3p5"],
- Parameter(
- group=infer_group,
- help='Structured tool-call parser to apply to rollout output. Use "none" to disable parsing, "qwen3" to enable Qwen3 tool-call parsing, or "qwen3p5" to enable Qwen3.5 coder-style tool-call parsing.',
- ),
- ] = "none"
- reasoning_parser: Annotated[
- Literal["none", "qwen3"],
- Parameter(
- group=infer_group,
- help='Reasoning parser to apply to rollout output. Use "none" to disable parsing or "qwen3" to enable Qwen3 parsing.',
- ),
- ] = "none"
enable_float8: Annotated[
bool,
Parameter(