Source code for easydel.inference.tools.utils
# Copyright 2025 The EasyDeL Author @erfanzar (Erfan Zare Chavoshi).
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for tool call parsing in EasyDeL inference.
This module provides essential helper functions for parsing and processing
tool calls from Large Language Model outputs. It includes utilities for:
- Partial JSON parsing and validation
- String manipulation for streaming token differences
- Common prefix/suffix extraction for incremental parsing
- Whitespace handling in structured outputs
These utilities are fundamental to the streaming tool call extraction
process, enabling parsers to handle incomplete JSON/XML structures
as they are being generated token-by-token.
Key Functions:
find_common_prefix: Find shared prefix between strings
find_common_suffix: Find shared suffix between strings
extract_intermediate_diff: Extract new content for streaming
partial_json_loads: Parse potentially incomplete JSON
is_complete_json: Validate JSON completeness
"""
import json
from json import JSONDecodeError, JSONDecoder
from typing import Any
import partial_json_parser
from partial_json_parser.core.options import Allow
[docs]def find_common_prefix(s1: str, s2: str) -> str:
"""
Find common prefix shared between two strings.
Used for extracting information from JSON generated by partial_json_parser
to ensure proper token streaming without premature closure of quotes,
brackets, or braces. Order of arguments does not matter.
Args:
s1: First string to compare
s2: Second string to compare
Returns:
The common prefix substring, empty string if none
Example:
>>> find_common_prefix('{"fruit": "ap"}', '{"fruit": "apple"}')
'{"fruit": "ap'
"""
prefix = ""
min_length = min(len(s1), len(s2))
for i in range(0, min_length):
if s1[i] == s2[i]:
prefix += s1[i]
else:
break
return prefix
[docs]def find_common_suffix(s1: str, s2: str) -> str:
"""
Find common suffix shared between two strings.
Stops when suffix ends or hits an alphanumeric character.
Order of arguments does not matter.
Args:
s1: First string to compare
s2: Second string to compare
Returns:
The common suffix substring, empty string if none
Example:
>>> find_common_suffix('{"fruit": "ap"}', '{"fruit": "apple"}')
'"}'
"""
suffix = ""
min_length = min(len(s1), len(s2))
for i in range(1, min_length + 1):
if s1[-i] == s2[-i] and not s1[-i].isalnum():
suffix = s1[-i] + suffix
else:
break
return suffix
[docs]def find_all_indices(string: str, substring: str) -> list[int]:
"""
Find all starting indices of a substring in a string.
Useful for locating multiple tool call boundaries or markers
in model output.
Args:
string: String to search in
substring: Substring to find
Returns:
List of starting indices where substring appears
"""
indices = []
index = -1
while True:
index = string.find(substring, index + 1)
if index == -1:
break
indices.append(index)
return indices
[docs]def partial_json_loads(input_str: str, flags: Allow) -> tuple[Any, int]:
"""
Load partial JSON with fallback to standard decoder.
Attempts to parse potentially incomplete JSON using partial_json_parser,
falling back to standard JSON decoder for "Extra data" errors.
Args:
input_str: JSON string to parse (may be incomplete)
flags: Parsing flags from partial_json_parser.Allow
Returns:
Tuple of (parsed object, characters consumed)
Raises:
JSONDecodeError: If JSON is malformed (not just incomplete)
"""
try:
return (partial_json_parser.loads(input_str, flags), len(input_str))
except JSONDecodeError as e:
if "Extra data" in e.msg:
dec = JSONDecoder()
return dec.raw_decode(input_str)
raise
[docs]def is_complete_json(input_str: str) -> bool:
"""
Check if a string is complete, valid JSON.
Args:
input_str: String to validate
Returns:
True if string is complete valid JSON, False otherwise
"""
try:
json.loads(input_str)
return True
except JSONDecodeError:
return False
[docs]def consume_space(i: int, s: str) -> int:
"""
Skip whitespace characters starting from index.
Args:
i: Starting index
s: String to process
Returns:
Index of first non-whitespace character, or len(s) if all whitespace
"""
while i < len(s) and s[i].isspace():
i += 1
return i