Spaces:
Configuration error
Configuration error
File size: 4,278 Bytes
447ebeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import os
import re
import ast
from pathlib import Path
class DataReplaceVisitor(ast.NodeVisitor):
"""AST visitor that finds calls to .replace("data:", ...) in the code."""
def __init__(self):
self.issues = []
self.current_file = None
def set_file(self, filename):
self.current_file = filename
def visit_Call(self, node):
# Check for method calls like x.replace(...)
if isinstance(node.func, ast.Attribute) and node.func.attr == "replace":
# Check if first argument is "data:"
if (
len(node.args) >= 2
and isinstance(node.args[0], ast.Constant)
and isinstance(node.args[0].value, str)
and "data:" in node.args[0].value
):
self.issues.append(
{
"file": self.current_file,
"line": node.lineno,
"col": node.col_offset,
"text": f'Found .replace("data:", ...) at line {node.lineno}',
}
)
# Continue visiting child nodes
self.generic_visit(node)
def check_file_with_ast(file_path):
"""Check a Python file for .replace("data:", ...) using AST parsing."""
with open(file_path, "r", encoding="utf-8") as f:
try:
tree = ast.parse(f.read(), filename=file_path)
visitor = DataReplaceVisitor()
visitor.set_file(file_path)
visitor.visit(tree)
return visitor.issues
except SyntaxError:
return [
{
"file": file_path,
"line": 0,
"col": 0,
"text": f"Syntax error in file, could not parse",
}
]
def check_file_with_regex(file_path):
"""Check any file for .replace("data:", ...) using regex."""
issues = []
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
for i, line in enumerate(f, 1):
matches = re.finditer(r'\.replace\(\s*[\'"]data:[\'"]', line)
for match in matches:
issues.append(
{
"file": file_path,
"line": i,
"col": match.start(),
"text": f'Found .replace("data:", ...) at line {i}',
}
)
return issues
def scan_directory(base_dir):
"""Scan a directory recursively for files containing .replace("data:", ...)."""
all_issues = []
for root, _, files in os.walk(base_dir):
for file in files:
print("checking file: ", file)
file_path = os.path.join(root, file)
# Skip directories we don't want to check
if any(
d in file_path for d in [".git", "__pycache__", ".venv", "node_modules"]
):
continue
# For Python files, use AST for more accurate parsing
if file.endswith(".py"):
issues = check_file_with_ast(file_path)
# For other files that might contain code, use regex
elif file.endswith((".js", ".ts", ".jsx", ".tsx", ".md", ".ipynb")):
issues = check_file_with_regex(file_path)
else:
continue
all_issues.extend(issues)
return all_issues
def main():
# Start from the project root directory
base_dir = "./litellm"
# Local testing
# base_dir = "../../litellm"
print(f"Scanning for .replace('data:', ...) usage in {base_dir}")
issues = scan_directory(base_dir)
if issues:
print(f"\n⚠️ Found {len(issues)} instances of .replace('data:', ...):")
for issue in issues:
print(f"{issue['file']}:{issue['line']} - {issue['text']}")
# Fail the test if issues are found
raise Exception(
f"Found {len(issues)} instances of .replace('data:', ...) which may be unsafe. Use litellm.CustomStreamWrapper._strip_sse_data_from_chunk instead."
)
else:
print("✅ No instances of .replace('data:', ...) found.")
if __name__ == "__main__":
main()
|