Spaces:

ByteMaster01
/

WARPxMetafusion

Sleeping

App Files Files Community

ByteMaster01 commited on Jun 4

Commit

0a65f9d

1 Parent(s): f4a7a03

initial commit

Browse files

Files changed (13) hide show

app.py +262 -0
configs/__pycache__/prompt_config.cpython-313.pyc +0 -0
configs/prompt_config.py +32 -0
data_utils/__init__.py +0 -0
data_utils/__pycache__/__init__.cpython-313.pyc +0 -0
data_utils/__pycache__/base_conversion_utils.cpython-313.pyc +0 -0
data_utils/__pycache__/line_based_parsing.cpython-313.pyc +0 -0
data_utils/__pycache__/schema_utils.cpython-313.pyc +0 -0
data_utils/base_conversion_utils.py +572 -0
data_utils/line_based_parsing.py +180 -0
data_utils/schema_utils.py +149 -0
data_utils/utils.py +183 -0
requirements.txt +62 -0

app.py ADDED Viewed

	@@ -0,0 +1,262 @@

+import gradio as gr
+import json
+import requests
+import os
+import subprocess
+import wget
+from loguru import logger
+from data_utils.line_based_parsing import parse_line_based_query, convert_to_lines
+from data_utils.base_conversion_utils import (
+    build_schema_maps,
+    convert_modified_to_actual_code_string
+)
+from data_utils.schema_utils import schema_to_line_based
+from configs.prompt_config import SYSTEM_PROMPT_V3, MODEL_PROMPT_V3
+LLAMA_SERVER_URL = "http://127.0.0.1:8080/v1/chat/completions"
+MODEL_PATH = "./models/unsloth.Q8_0.gguf"
+def download_model():
+    """Download the model if it doesn't exist"""
+    os.makedirs("./models", exist_ok=True)
+    if not os.path.exists(MODEL_PATH):
+        logger.info("Downloading model weights...")
+        wget.download(
+            "https://huggingface.co/ByteMaster01/NL2SQL/resolve/main/unsloth.Q8_0.gguf",
+            MODEL_PATH
+        )
+        logger.info("\nModel download complete!")
+def start_llama_server():
+    """Start the llama.cpp server with the downloaded model"""
+    try:
+        logger.info("Starting llama.cpp server...")
+        subprocess.Popen([
+            "python", "-m", "llama_cpp.server",
+            "--model", MODEL_PATH,
+            "--port", "8080"
+        ])
+        logger.info("Server started successfully!")
+    except Exception as e:
+        logger.error(f"Failed to start server: {e}")
+        raise
+def convert_line_parsed_to_mongo(line_parsed: str, schema: dict) -> str:
+    try:
+        modified_query = parse_line_based_query(line_parsed)
+        collection_name = schema["collections"][0]["name"]
+        in2out, _ = build_schema_maps(schema)
+        reconstructed_query = convert_modified_to_actual_code_string(modified_query, in2out, collection_name)
+        return reconstructed_query
+    except Exception as e:
+        logger.error(f"Error converting line parsed to MongoDB query: {e}")
+        return ""
+def process_query(schema_text: str, nl_query: str, additional_info: str = "") -> str:
+    try:
+        # Parse schema from string to dict
+        schema = json.loads(schema_text)
+        # Convert schema to line-based format
+        line_based_schema = schema_to_line_based(schema)
+        # Format prompt with line-based schema
+        prompt = MODEL_PROMPT_V3.format(
+            schema=line_based_schema,
+            natural_language_query=nl_query,
+            additional_info=additional_info
+        )
+        # Prepare request payload
+        payload = {
+            "slot_id": 0,
+            "temperature": 0.1,
+            "n_keep": -1,
+            "cache_prompt": True,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": SYSTEM_PROMPT_V3,
+                },
+                {
+                    "role": "user",
+                    "content": prompt
+                },
+            ]
+        }
+        # Make request to llama.cpp server
+        response = requests.post(LLAMA_SERVER_URL, json=payload)
+        response.raise_for_status()
+        # Extract output from response
+        output = response.json()["choices"][0]["message"]["content"].strip()
+        logger.info(f"Model output: {output}")
+        # Convert line-based output to MongoDB query
+        mongo_query = convert_line_parsed_to_mongo(output, schema)
+        return [
+            mongo_query,
+            output
+        ]
+    except Exception as e:
+        logger.error(f"Error processing query: {e}")
+        error_msg = f"Error: {str(e)}"
+        return [error_msg, error_msg, error_msg]
+def create_interface():
+    # Create Gradio interface
+    iface = gr.Interface(
+        fn=process_query,
+        inputs=[
+            gr.Textbox(
+                label="Schema (JSON format)",
+                placeholder="Enter your MongoDB schema in JSON format...",
+                lines=10
+            ),
+            gr.Textbox(
+                label="Natural Language Query",
+                placeholder="Enter your query in natural language..."
+            ),
+            gr.Textbox(
+                label="Additional Info (Optional)",
+                placeholder="Enter any additional context (timestamps, etc)..."
+            )
+        ],
+        outputs=[
+            gr.Code(label="MongoDB Query", language="javascript", lines=1),
+            gr.Textbox(label="Line-based Query")
+        ],
+        title="Natural Language to MongoDB Query Converter",
+        description="Convert natural language queries to MongoDB queries based on your schema.",
+        examples=[
+            [
+                '''{
+    "collections": [{
+        "name": "events",
+        "document": {
+            "properties": {
+                "timestamp": {"bsonType": "int"},
+                "severity": {"bsonType": "int"},
+                "location": {
+                    "bsonType": "object",
+                    "properties": {
+                        "lat": {"bsonType": "double"},
+                        "lon": {"bsonType": "double"}
+                    }
+                }
+            }
+        }
+    }]}''',
+                "Find all events with severity greater than 5",
+                ""
+            ],
+            [
+                '''{
+    "collections": [{
+        "name": "vehicles",
+        "document": {
+            "properties": {
+                "timestamp": {"bsonType": "int"},
+                "vehicle_details": {
+                    "bsonType": "object",
+                    "properties": {
+                        "license_plate": {"bsonType": "string"},
+                        "make": {"bsonType": "string"},
+                        "model": {"bsonType": "string"},
+                        "year": {"bsonType": "int"},
+                        "color": {"bsonType": "string"}
+                    }
+                },
+                "speed": {"bsonType": "double"},
+                "location": {
+                    "bsonType": "object",
+                    "properties": {
+                        "lat": {"bsonType": "double"},
+                        "lon": {"bsonType": "double"}
+                    }
+                }
+            }
+        }
+    }]}''',
+                "Find red Toyota vehicles manufactured after 2020 with speed above 60",
+                ""
+            ],
+            [
+                '''{
+    "collections": [{
+        "name": "sensors",
+        "document": {
+            "properties": {
+                "sensor_id": {"bsonType": "string"},
+                "readings": {
+                    "bsonType": "object",
+                    "properties": {
+                        "temperature": {"bsonType": "double"},
+                        "humidity": {"bsonType": "double"},
+                        "pressure": {"bsonType": "double"}
+                    }
+                },
+                "timestamp": {"bsonType": "date"},
+                "status": {"bsonType": "string"}
+            }
+        }
+    }]}''',
+                "Find active sensors with temperature above 30 degrees in the last one day",
+                '''current date is 21 january 2025'''
+            ],
+            [
+                '''{
+    "collections": [{
+        "name": "orders",
+        "document": {
+            "properties": {
+                "order_id": {"bsonType": "string"},
+                "customer": {
+                    "bsonType": "object",
+                    "properties": {
+                        "id": {"bsonType": "string"},
+                        "name": {"bsonType": "string"},
+                        "email": {"bsonType": "string"}
+                    }
+                },
+                "items": {
+                    "bsonType": "array",
+                    "items": {
+                        "bsonType": "object",
+                        "properties": {
+                            "product_id": {"bsonType": "string"},
+                            "quantity": {"bsonType": "int"},
+                            "price": {"bsonType": "double"}
+                        }
+                    }
+                },
+                "total_amount": {"bsonType": "double"},
+                "status": {"bsonType": "string"},
+                "created_at": {"bsonType": "int"}
+            }
+        }
+    }]}''',
+                "Find orders with total amount greater than $100 that contain more than 3 items and were created in the last 24 hours",
+                '''{"current_time": 1685890800, "last_24_hours": 1685804400}'''
+            ]
+        ]
+    )
+    return iface
+if __name__ == "__main__":
+    # Download the model
+    download_model()
+    # Start the llama.cpp server
+    start_llama_server()
+    # Give the server a moment to start
+    import time
+    time.sleep(5)
+    # Launch the Gradio interface
+    print("Starting Gradio interface...")
+    iface = create_interface()
+    iface.launch()

configs/__pycache__/prompt_config.cpython-313.pyc ADDED Viewed

Binary file (1.35 kB). View file

configs/prompt_config.py ADDED Viewed

	@@ -0,0 +1,32 @@

+SYSTEM_PROMPT_V3 = """You are a MongoDB query parsing assistant. Your task is to convert a natural language query into a structured, line-by-line parsed format suitable for building MongoDB queries.
+You will receive:
+- schema: <MongoDB schema fields and their descriptions>
+- natural_language_query: <A plain English query describing the  intent of user.>
+- additional_info: <optional context or constraints>
+Your job is to extract the relevant conditions and represent them in the following parsed format:
+- Each filter is on a separate line
+- Use operators like:
+    =      - equality
+    $gt    - greater than
+    $lt    - less than
+    $gte   - greater than or equal to
+    $lte   - less than or equal to
+    $in    - inclusion list (comma-separated values)
+    $regex - regular expression for matching
+- Optionally, include:
+    sort = <field_name> (ascending or descending)
+    limit = <number>
+Follow the schema strictly. Do not hallucinate field names. Output only the parsed query format with no explanations.
+"""
+MODEL_PROMPT_V3 = """schema:
+{schema}
+natural_language_query: {natural_language_query}
+additional_info: {additional_info}
+parsed_mongo_query:"""

data_utils/__init__.py ADDED Viewed

File without changes

data_utils/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (147 Bytes). View file

data_utils/__pycache__/base_conversion_utils.cpython-313.pyc ADDED Viewed

Binary file (22.1 kB). View file

data_utils/__pycache__/line_based_parsing.cpython-313.pyc ADDED Viewed

Binary file (7.58 kB). View file

data_utils/__pycache__/schema_utils.cpython-313.pyc ADDED Viewed

Binary file (4.13 kB). View file

data_utils/base_conversion_utils.py ADDED Viewed

	@@ -0,0 +1,572 @@

+from typing import Any, Dict, Tuple, List
+from loguru import logger
+import json
+import re
+def _normalize_number(match):
+        num_str = match.group(0)
+        if '.' in num_str:
+            # Normalize float by removing trailing zeros and decimal point if needed
+            return str(float(num_str))
+        return num_str  # Leave integers as is
+def clean_query(query: str) -> str:
+    """
+    Cleans the MongoDB query string by removing unnecessary whitespace and formatting.
+    to do:
+    - replace ' with "
+    - remove all spaces
+    - strip the query
+    - convert '''<query>''' to <query>
+    - remove \n
+    - remove empty brackets {}
+    """
+    # replace \' with "
+    query = query.replace("'", "\"")
+    # Remove all spaces
+    query = query.replace(" ", "")
+    # Strip the query
+    query = query.strip()
+    # Convert '''<query>''' to <query>
+    if query.startswith("'''") and query.endswith("'''"):
+        query = query[3:-3]
+    # Remove \n
+    query = query.replace("\n", "")
+    # Remove empty brackets {}
+    query = query.replace("{}", "")
+    # Replace .toArray() with ""
+    query = query.replace(".toArray()", "")
+    # Normalize number strings
+    query = re.sub(r'(?<!["\w])(-?\d+\.\d+)(?!["\w])', _normalize_number, query)
+    return query
+def extract_field_paths(properties: Dict[str, Any], prefix: str = "") -> Dict[str, str]:
+    """
+    Recursively extract all leaf property names to full dot-paths
+    from a Mongo JSON Schema 'properties' dict.
+    Handles nested objects and arrays of objects.
+    Returns {field_name: full_path}
+    """
+    paths: Dict[str, str] = {}
+    for key, val in properties.items():
+        current = prefix + key
+        # If nested object, recurse
+        if val.get("bsonType") == "object" and "properties" in val:
+            paths.update(extract_field_paths(val["properties"], current + "."))
+        # If array of objects, recurse into items
+        elif val.get("bsonType") == "array" and "items" in val and val["items"].get("bsonType") == "object" and "properties" in val["items"]:
+            paths.update(extract_field_paths(val["items"]["properties"], current + "."))
+        else:
+            paths[key] = current
+    return paths
+def build_schema_maps(schema: Dict[str, Any]) -> Tuple[Dict[str, str], Dict[str, str]]:
+    """
+    From a full JSON Schema, return two maps:
+      - input_to_output: field_name -> nested field path
+      - output_to_input: nested field path -> field_name
+    Handles both nested and flat schemas correctly.
+    """
+    props = schema["collections"][0]["document"]["properties"]
+    input_to_output = extract_field_paths(props)
+    output_to_input = {v: k for k, v in input_to_output.items()}
+    return input_to_output, output_to_input
+def set_nested(d: Dict[str, Any], keys: List[str], value: Any) -> None:
+    """
+    Helper to set a nested value in a dict given a list of keys.
+    """
+    for k in keys[:-1]:
+        d = d.setdefault(k, {})
+    d[keys[-1]] = value
+def dot_notation_to_nested(dot: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert a dict with dot-notation keys to nested dict structure.
+    E.g. {"a.b": v} -> {"a": {"b": v}}
+    """
+    out: Dict[str, Any] = {}
+    for key, val in dot.items():
+        parts = key.split('.')
+        set_nested(out, parts, val)
+    return out
+def nested_to_dot(d: Dict[str, Any], prefix: str = "") -> Dict[str, Any]:
+    """
+    Convert nested dict to dot-notation keys. Treat operator-dicts as leaves.
+    """
+    out: Dict[str, Any] = {}
+    for k, v in d.items():
+        new_pref = f"{prefix}.{k}" if prefix else k
+        # operator-dict leaf?
+        if isinstance(v, dict) and v and all(str(kk).startswith("$") for kk in v):
+            out[new_pref] = v
+        elif isinstance(v, dict):
+            out.update(nested_to_dot(v, new_pref))
+        else:
+            out[new_pref] = v
+    return out
+def modified_to_actual_query(modified: Dict[str, Any],
+                            input_to_output: Dict[str, str]) -> Dict[str, Any]:
+    """
+    Convert a flat filter dict (field_name -> value/operator) into
+    a nested Mongo query dict according to the schema map.
+    If a key is not in the schema, treat it as dot notation.
+    """
+    query: Dict[str, Any] = {}
+    for field_name, val in modified.items():
+        if field_name in input_to_output:
+            path = input_to_output[field_name].split('.')
+            set_nested(query, path, val)
+        else:
+            # fallback: treat as dot notation
+            set_nested(query, field_name.split('.'), val)
+    return query
+def actual_to_modified_query(actual: Dict[str, Any],
+                             output_to_input: Dict[str, str]) -> Dict[str, Any]:
+    """
+    Flatten a nested Mongo query dict back into field_name -> value/operator.
+    Operator-dicts (keys starting with $) are treated as leaves.
+    If a path is not in output_to_input mapping, preserve it as-is.
+    """
+    flat: Dict[str, Any] = {}
+    def recurse(d: Any, prefix: str = "") -> None:
+        # operator-dict leaf
+        if isinstance(d, dict) and d and all(k.startswith("$") for k in d):
+            if prefix in output_to_input:
+                flat[output_to_input[prefix]] = d
+            else:
+                flat[prefix] = d
+            return
+        # leaf non-dict
+        if not isinstance(d, dict):
+            if prefix in output_to_input:
+                flat[output_to_input[prefix]] = d
+            else:
+                flat[prefix] = d
+            return
+        # recurse deeper
+        for k, v in d.items():
+            new_pref = f"{prefix}.{k}" if prefix else k
+            recurse(v, new_pref)
+    recurse(actual)
+    return flat
+def build_query_and_options(
+    modified: Dict[str, Any],
+    input_to_output: Dict[str, str]
+) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+    """
+    From a flat input dict that may include filter fields plus
+    special options (limit, skip, sort, projection), return:
+      - nested Mongo filter dict
+      - options dict with keys: limit, skip, sort, projection
+    """
+    # extract special keys
+    options: Dict[str, Any] = {}
+    for opt in ("limit", "skip", "sort", "projection"):  # in this order
+        if opt in modified:
+            options[opt] = modified.pop(opt)
+    # build nested filter
+    query = modified_to_actual_query(modified, input_to_output)
+    return query, options
+def convert_modified_to_actual_code_string(
+    modified_input: dict,
+    in2out: dict,
+    collection_name: str = "events"
+) -> str:
+    """
+    Converts a modified (flat) dict into a MongoDB code string.
+    Omits the projection argument if opts['projection'] is empty.
+    Prints filter in dot-notation to match db.find syntax.
+    """
+    import re
+    # Remove internal metadata fields before processing
+    modified_input = {k: v for k, v in modified_input.items() if not k.startswith('_')}
+    filter_dict, opts = build_query_and_options(modified_input.copy(), in2out)
+    # 1) dot-ify the filter dict
+    dot_filter = nested_to_dot(filter_dict)
+    filter_str = json.dumps(dot_filter, separators=(",", ":"))
+    # 2) Convert date strings back to appropriate MongoDB date format
+    # This regex matches ISO date strings like "2024-01-01T00:00:00Z"
+    date_pattern = r'"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)"'
+    # Check if there was a newDate string in the original query
+    # If so, we need to preserve that format instead of using ISODate
+    if "newDate" in modified_input.get("_original_query_format", ""):
+        filter_str = re.sub(date_pattern, r'newDate("\1")', filter_str)
+    else:
+        # Default to ISODate format
+        filter_str = re.sub(date_pattern, r'ISODate("\1")', filter_str)
+    # 3) Restore special time expressions that might have been converted
+    time_expr_pattern = r'"(newDate\.getTime\(\)-\d+)"'
+    filter_str = re.sub(time_expr_pattern, r'\1', filter_str)
+    # 4) only include projection if non-empty
+    projection = opts.get("projection", None)
+    projection_str = ""
+    if projection:
+        projection_str = json.dumps(projection, separators=(',', ':'))
+        # Also convert date strings in projection if any
+        if "newDate" in modified_input.get("_original_query_format", ""):
+            projection_str = re.sub(date_pattern, r'newDate("\1")', projection_str)
+        else:
+            projection_str = re.sub(date_pattern, r'ISODate("\1")', projection_str)
+    parts = [f"db.{collection_name}.find({filter_str}"
+             + (f", {projection_str}" if projection else "")
+             + ")"]
+    # 5) chain optional methods
+    if opts.get("sort"):
+        # Handle different sort formats
+        sort_value = opts['sort']
+        if isinstance(sort_value, list):
+            # Convert array format to object format
+            sort_obj = {}
+            for key, direction in sort_value:
+                sort_obj[key] = direction
+            sort_value = sort_obj
+        # For sort parameters, we want to preserve the MongoDB format exactly
+        # Convert the sort object to a string without quotes around the entire thing
+        if isinstance(sort_value, dict):
+            sort_items = []
+            for k, v in sort_value.items():
+                sort_items.append(f'"{k}":{v}')
+            sort_str = '{' + ','.join(sort_items) + '}'
+        else:
+            sort_str = str(sort_value)
+        parts.append(f".sort({sort_str})")
+    if opts.get("skip"):
+        parts.append(f".skip({opts['skip']})")
+    if opts.get("limit"):
+        parts.append(f".limit({opts['limit']})")
+    return "".join(parts)
+def convert_actual_code_to_modified_dict(actual_code: str, out2in: dict) -> dict:
+    """
+    Converts an actual MongoDB query string into a modified flat dictionary.
+    WARNING: This assumes the input is sanitized and safe (e.g., evaluated from a trusted source).
+    """
+    import ast
+    import re
+    import json
+    from datetime import datetime, timedelta
+    # Store original number strings
+    original_numbers = {}
+    def store_number_strings(s: str) -> str:
+        def replace_number(match):
+            num_str = match.group(0)
+            # Only store if it has a decimal point (to preserve trailing zeros)
+            if '.' in num_str:
+                try:
+                    num = float(num_str)
+                    # Store the longest representation for this float
+                    key = str(num)
+                    if key not in original_numbers or len(num_str) > len(original_numbers[key]):
+                        original_numbers[key] = num_str
+                except ValueError:
+                    pass
+            return num_str
+        # Match numbers with optional decimal places and trailing zeros
+        number_pattern = r'-?\d+\.\d+'
+        re.sub(number_pattern, replace_number, s)
+        return s
+    def preprocess_mongo_syntax(query_str):
+        store_number_strings(query_str)
+        # Replace ISODate("..."), ISODate('...') with the date string
+        query_str = re.sub(r'ISODate\("([^"]+)"\)', r'"\1"', query_str)
+        query_str = re.sub(r"ISODate\('([^']+)'\)", r'"\1"', query_str)
+        # Handle newDate(newDate().getTime()-<expr>)
+        def newdate_minus_expr(match):
+            expr = match.group(1)
+            try:
+                # Evaluate the expression safely (only numbers and operators)
+                ms = int(eval(expr, {"__builtins__": None}, {}))
+                from datetime import datetime, timedelta
+                dt = datetime.utcnow() + timedelta(milliseconds=ms)
+                return '"' + dt.strftime('%Y-%m-%dT%H:%M:%SZ') + '"'
+            except Exception:
+                return '"1970-01-01T00:00:00Z"'  # fallback
+        query_str = re.sub(r'newDate\(newDate\(\)\.getTime\(\)([-+*/0-9 ]+)\)', newdate_minus_expr, query_str)
+        # Replace newDate() with current UTC time
+        from datetime import datetime
+        now = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
+        query_str = re.sub(r'newDate\(\)', f'"{now}"', query_str)
+        # Replace newDate(expr) with a string (handle both quote types)
+        query_str = re.sub(r'newDate\("([^"]+)"\)', r'"\1"', query_str)
+        query_str = re.sub(r"newDate\('([^']+)'\)", r'"\1"', query_str)
+        query_str = re.sub(r'newDate\((.*?)\)', r'"\1"', query_str)
+        # Fix unbalanced brackets
+        if query_str.count('{') > query_str.count('}'):
+            query_str += "}" * (query_str.count('{') - query_str.count('}'))
+        return query_str
+    # Extract filter dictionary from find() call using regex
+    def extract_filter_dict(code):
+        # Match db.collection.find(...) pattern
+        find_pattern = r'db\.[^.]+\.find\((.*?)(?:\)|,\s*{)'
+        find_match = re.search(find_pattern, code)
+        if not find_match:
+            raise ValueError("Could not extract filter parameters from find() call")
+        filter_str = find_match.group(1)
+        # If empty, return empty dict
+        if not filter_str.strip():
+            return {}
+        try:
+            # Try parsing as JSON
+            return json.loads(filter_str)
+        except json.JSONDecodeError:
+            # Try with ast.literal_eval
+            try:
+                return ast.literal_eval(filter_str)
+            except:
+                # Last resort - try fixing common issues and retry
+                fixed_str = filter_str.replace("'", '"')
+                try:
+                    return json.loads(fixed_str)
+                except:
+                    raise ValueError(f"Could not parse filter dictionary: {filter_str}")
+    # Extract projection dictionary from find() call using regex
+    def extract_projection_dict(code):
+        # Match find(..., {projection}) pattern
+        proj_pattern = r'find\([^{]*({[^}]*})[^{]*,\s*{([^}]*)}\s*\)'
+        proj_match = re.search(proj_pattern, code)
+        if not proj_match:
+            return None
+        proj_str = proj_match.group(2)
+        try:
+            # Try parsing as JSON
+            return json.loads(proj_str.replace("'", '"'))
+        except:
+            # Try with ast.literal_eval
+            try:
+                return ast.literal_eval(proj_str)
+            except:
+                return None
+    # Extract method parameters using regex for cases where ast.literal_eval fails
+    def extract_method_params(code, method_name):
+        # Look for .method_name({...}) or .method_name([...]) or .method_name(123) pattern
+        pattern = fr'\.{method_name}\s*\((.*?)\)(?:\.|\s*$)'
+        match = re.search(pattern, code)
+        if not match:
+            return None
+        param_str = match.group(1).strip()
+        # Empty parameter
+        if not param_str:
+            return None
+        # Try to handle different parameter types
+        try:
+            # Simple number?
+            if param_str.isdigit():
+                return int(param_str)
+            # JSON object or array?
+            try:
+                # Handle MongoDB format with double quotes
+                return json.loads(param_str.replace("'", '"'))
+            except json.JSONDecodeError:
+                # If direct JSON parsing fails, try to use ast.literal_eval
+                try:
+                    return ast.literal_eval(param_str)
+                except:
+                    # Return as is if all else fails
+                    return param_str
+        except Exception as e:
+            # Return None if all parsing fails
+            logger.warning(f"Failed to parse parameter for {method_name}: {e}")
+            return None
+    # Pre-process the query
+    preprocessed_code = preprocess_mongo_syntax(actual_code)
+    try:
+        # Try to use our more robust regex-based parsing first
+        filter_dict = extract_filter_dict(preprocessed_code)
+        projection = extract_projection_dict(preprocessed_code)
+        # Handle empty projection
+        options = {"projection": projection} if projection else {}
+        # Extract sort, limit and skip parameters
+        sort_param = extract_method_params(preprocessed_code, "sort")
+        if sort_param is not None:
+            options["sort"] = sort_param
+        limit_param = extract_method_params(preprocessed_code, "limit")
+        if limit_param is not None:
+            options["limit"] = int(limit_param) if isinstance(limit_param, (int, str)) else limit_param
+        skip_param = extract_method_params(preprocessed_code, "skip")
+        if skip_param is not None:
+            options["skip"] = int(skip_param) if isinstance(skip_param, (int, str)) else skip_param
+        # Convert actual filter_dict back to modified
+        flat_filter = actual_to_modified_query(filter_dict, out2in)
+        # Merge projection, sort, limit into modified if relevant
+        for key in ("projection", "sort", "skip", "limit"):
+            if key in options and options[key] is not None:
+                flat_filter[key] = options[key]
+        # Add original number strings to the result
+        flat_filter['_original_numbers'] = original_numbers
+        return flat_filter
+    except Exception as e:
+        # Fall back to traditional AST-based parsing if regex fails
+        try:
+            node = ast.parse(preprocessed_code.strip(), mode='eval')
+            if not isinstance(node.body, ast.Call) or not hasattr(node.body.func, 'attr') or node.body.func.attr != "find":
+                raise ValueError("Expected .find(...) style query")
+            # extract find(filter, projection)
+            args = node.body.args
+            filter_dict = ast.literal_eval(args[0])
+            projection = ast.literal_eval(args[1]) if len(args) > 1 else None
+            # extract chained methods: sort, skip, limit
+            options = {"projection": projection} if projection else {}
+            current = node.body
+            while isinstance(current, ast.Call):
+                func = current.func
+                if hasattr(func, "attr"):
+                    if func.attr == "sort":
+                        options["sort"] = ast.literal_eval(current.args[0])
+                    elif func.attr == "skip":
+                        options["skip"] = ast.literal_eval(current.args[0])
+                    elif func.attr == "limit":
+                        options["limit"] = ast.literal_eval(current.args[0])
+                current = func.value if hasattr(func, "value") else None
+            # Convert actual filter_dict back to modified
+            flat_filter = actual_to_modified_query(filter_dict, out2in)
+            # Merge projection, sort, limit into modified if relevant
+            for key in ("projection", "sort", "skip", "limit"):
+                if key in options:
+                    flat_filter[key] = options[key]
+            return flat_filter
+        except Exception as nested_e:
+            raise ValueError(f"Failed to parse MongoDB query string: {e}. AST fallback also failed: {nested_e}")
+# -------------------- Example Usage --------------------
+if __name__ == "__main__":
+    # Example JSON Schema
+    schema = {
+      "collections": [{
+        "name": "events",
+        "document": {
+          "properties": {
+            "event_id": {"bsonType": "int"},
+            "timestamp": {"bsonType": "int"},
+            "severity_level": {"bsonType": "int"},
+            "camera_id": {"bsonType": "int"},
+            "vehicle_details": {"bsonType": "object", "properties": {
+              "license_plate_number": {"bsonType": "string"},
+              "vehicle_type": {"bsonType": "string"},
+              "color": {"bsonType": "string"}
+            }},
+            "person_details": {"bsonType": "object", "properties": {
+              "match_id": {"bsonType": "int"},
+              "age": {"bsonType": "int"},
+              "gender": {"bsonType": "string"},
+              "clothing_description": {"bsonType": "string"}
+            }},
+            "location": {"bsonType": "object", "properties": {
+              "latitude": {"bsonType": "double"},
+              "longitude": {"bsonType": "double"}
+            }},
+            "sensor_readings": {"bsonType": "object", "properties": {
+              "temperature": {"bsonType": "double"},
+              "speed": {"bsonType": "double"},
+              "distance": {"bsonType": "double"}
+            }},
+            "incident_type": {"bsonType": "string"}
+          }
+        }
+      }],
+      "version": 1
+    }
+    # Build mappings once
+    in2out, out2in = build_schema_maps(schema)
+    # Flat user input including filters + options
+    modified_input = {
+        "license_plate_number": {"$regex": "^MH12"},
+        "timestamp": {"$gte": 1684080000, "$lte": 1684166400},
+        "severity_level": 3,
+        "limit": 50,
+        "skip": 10,
+        "sort": [("timestamp", -1)],
+        "projection": {
+            "vehicle_details.license_plate_number": 1,
+            "timestamp": 1,
+            "_id": 0
+        }
+    }
+    # Build actual nested query + options
+    filter_dict, opts = build_query_and_options(modified_input.copy(), in2out)
+    print("filter_dict =", filter_dict)
+    print("options     =", opts)
+    # You can then do:
+    # cursor = (
+    #   db.events.find(filter_dict, opts.get("projection"))
+    #                 .sort(opts.get("sort", []))
+    #                 .skip(opts.get("skip", 0))
+    #                 .limit(opts.get("limit", 0))
+    # )

data_utils/line_based_parsing.py ADDED Viewed

	@@ -0,0 +1,180 @@

+from typing import Dict, Any
+import ast
+from typing import Any, Dict
+def clean_modified_dict(modified_dict: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Cleans the modified dictionary by removing only values that are:
+    - None
+    - empty list []
+    - empty dict {}
+    - empty string ''
+    But keeps values like 0, False, etc.
+    """
+    def is_meaningfully_empty(value):
+        return value in (None, '', []) or (isinstance(value, dict) and not value)
+    return {k: v for k, v in modified_dict.items() if not is_meaningfully_empty(v)}
+def convert_to_lines(query_dict):
+    lines = []
+    for field, condition in query_dict.items():
+        if isinstance(condition, dict):
+            for operator, value in condition.items():
+                # Special handling for $ne with '' or []
+                if operator in ['$ne', 'ne']:
+                    if value == '':
+                        value_str = "''"
+                    elif value == []:
+                        value_str = '[]'
+                    elif isinstance(value, list):
+                        value_str = ','.join(map(str, value))
+                    elif isinstance(value, str):
+                        value_str = f"'{value}'"
+                    else:
+                        value_str = str(int(value) if isinstance(value, float) and value.is_integer() else value)
+                elif isinstance(value, list):
+                    # Output lists as valid Python lists for complex cases
+                    value_str = repr(value)
+                elif isinstance(value, str):
+                    value_str = f"'{value}'"
+                else:
+                    value_str = str(int(value) if isinstance(value, float) and value.is_integer() else value)
+                lines.append(f"{field} {operator} {value_str}")
+        else:
+            if isinstance(condition, str):
+                condition_str = f"'{condition}'"
+            else:
+                condition_str = str(condition)
+            lines.append(f"{field} = {condition_str}")
+    return '\n'.join(lines)
+def parse_line_based_query(lines):
+    query = {}
+    for line in lines.strip().split('\n'):
+        if not line.strip():
+            continue
+        parts = line.split(maxsplit=2)
+        if len(parts) < 3:
+            # If operator is present but value is empty, set value to empty string
+            if len(parts) == 2:
+                field, operator = parts
+                value = ''
+            else:
+                continue  # Skip invalid lines
+        else:
+            field, operator, value = parts
+        # Special handling for sort, limit, skip, etc.
+        if field in {"sort", "order_by"}:
+            # Handle both 'sort field value' and 'sort = {field: value}'
+            if operator == "=":
+                query[field] = _convert_value(value)
+            else:
+                if field not in query:
+                    query[field] = {}
+                query[field][operator] = _convert_value(value)
+            continue
+        if field in {"limit", "skip", "offset"}:
+            query[field] = _convert_value(value)
+            continue
+        # Special handling for _original_numbers (parse value as string if quoted, else as number)
+        if field == "_original_numbers":
+            if field not in query:
+                query[field] = {}
+            v = value.strip()
+            if (v.startswith("'") and v.endswith("'")) or (v.startswith('"') and v.endswith('"')):
+                query[field][operator] = v[1:-1]
+            else:
+                try:
+                    # Try to parse as int or float
+                    query[field][operator] = int(v)
+                except ValueError:
+                    try:
+                        query[field][operator] = float(v)
+                    except ValueError:
+                        query[field][operator] = v
+            continue
+        # Handle equality operator
+        if operator == "=":
+            query[field] = _convert_value(value)
+            continue
+        # Handle other operators
+        # If operator is $in, $nin, $all and value is empty, use []
+        empty_list_ops = {'in', '$in', 'nin', '$nin', 'all', '$all'}
+        op_key = operator if operator.startswith('$') else f'${operator}'
+        if operator in empty_list_ops and value == '':
+            value_obj = []
+        elif operator in {'ne', '$ne'}:
+            if value.strip() == '[]':
+                value_obj = []
+            elif value.strip() == "''" or value.strip() == '""':
+                value_obj = ''
+            elif value == '':
+                value_obj = []
+            else:
+                value_obj = _convert_value(value, operator)
+        else:
+            value_obj = _convert_value(value, operator)
+        if field in query:
+            if isinstance(query[field], dict):
+                query[field][op_key] = value_obj
+            else:
+                raise ValueError(f"Conflict in {field}: direct value and operator")
+        else:
+            query[field] = {op_key: value_obj}
+    return query
+def _convert_value(value_str, operator=None):
+    """Convert string values to appropriate types"""
+    # Handle lists for $in and $all operators
+    if operator in ('in', '$in', 'all', '$all'):
+        s = value_str.strip()
+        if s.startswith('[') and s.endswith(']'):
+            try:
+                return ast.literal_eval(s)
+            except Exception:
+                pass
+        if ',' in value_str:
+            return [_parse_single_value(v) for v in value_str.split(',')]
+    # Handle regex flags (e.g., "pattern i" → "pattern" with $options: 'i')
+    if operator == 'regex' and ' ' in value_str:
+        pattern, *flags = value_str.split()
+        return {'$regex': pattern, '$options': ''.join(flags)}
+    return _parse_single_value(value_str)
+def _parse_single_value(s):
+    """Convert individual values to int/float/string/dict/bool"""
+    s = s.strip()
+    # Remove surrounding quotes if present
+    if (s.startswith("'") and s.endswith("'")) or (s.startswith('"') and s.endswith('"')):
+        return s[1:-1].strip()  # Always return as string if quoted
+    # Handle None
+    if s == 'None':
+        return None
+    # Try to parse as dict if it looks like one
+    if (s.startswith('{') and s.endswith('}')) or (s.startswith('[') and s.endswith(']')):
+        try:
+            return ast.literal_eval(s)
+        except Exception:
+            pass
+    # Handle booleans
+    if s.lower() == 'true':
+        return True
+    if s.lower() == 'false':
+        return False
+    try:
+        return int(s)
+    except ValueError:
+        try:
+            return float(s)
+        except ValueError:
+            return s

data_utils/schema_utils.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from typing import Dict, Any
+def schema_to_line_based(schema: dict) -> str:
+    """
+    Converts a schema dictionary to a line-based format:
+    field  // description and format (str, int, ...)
+    Only shows field names without parent prefix (e.g. 'age' instead of 'involved_persons.age')
+    """
+    def get_type(info):
+        return info.get("bsonType") or info.get("type") or ""
+    def process_properties(properties: dict) -> list:
+        lines = []
+        for field, info in properties.items():
+            typ = get_type(info)
+            desc = info.get("description", "")
+            fmt = info.get("format", "")
+            # Compose type/format string
+            type_fmt = typ
+            if fmt:
+                type_fmt += f", {fmt}"
+            # Compose comment
+            comment = desc.strip()
+            if type_fmt:
+                comment = f"{comment} ({type_fmt})" if comment else f"({type_fmt})"
+            lines.append(f"{field}  // {comment}" if comment else field)
+            # Recursively process nested objects and arrays, but only add the field names without prefix
+            if typ == "object" and "properties" in info:
+                for nested_line in process_properties(info["properties"]):
+                    lines.append(nested_line)
+            elif typ == "array" and "items" in info:
+                items = info["items"]
+                if get_type(items) == "object" and "properties" in items:
+                    for nested_line in process_properties(items["properties"]):
+                        lines.append(nested_line)
+        return lines
+    collections = schema.get("collections", [])
+    if not collections:
+        return ""
+    collection = collections[0]
+    # Support both "document" and direct "properties"
+    if "document" in collection and "properties" in collection["document"]:
+        properties = collection["document"]["properties"]
+    else:
+        properties = collection.get("properties", {})
+    return "\n".join(process_properties(properties))
+if __name__ == "__main__":
+    example_schema = {
+  "collections": [
+    {
+      "name": "events",
+      "document": {
+        "bsonType": "object",
+        "properties": {
+          "identifier": {
+            "bsonType": "object",
+            "properties": {
+              "camgroup_id": {
+                "bsonType": "string",
+                "description": "Use this to filter events by group"
+              },
+              "task_id": {
+                "bsonType": "string",
+                "description": "Use this to filter events by tasks"
+              },
+              "camera_id": {
+                "bsonType": "string",
+                "description": "Use this to filter events by camera"
+              }
+            }
+          },
+          "response": {
+            "bsonType": "object",
+            "properties": {
+              "event": {
+                "bsonType": "object",
+                "properties": {
+                  "severity": {
+                    "bsonType": "string",
+                    "description": "Can be Low, Medium, Critical"
+                  },
+                  "type": {
+                    "bsonType": "string",
+                    "description": "Type of the event. Use this to filter events of person and vehicle"
+                  },
+                  "blobs": {
+                    "bsonType": "array",
+                    "items": {
+                      "bsonType": "object",
+                      "properties": {
+                        "url": {
+                          "bsonType": "string"
+                        },
+                        "attribs": {
+                          "bsonType": "object",
+                          "description": "Use this for attributes like Gender (Only Male, Female), Upper Clothing, Lower Clothing, Age (Ranges like 20-30, 30-40 and so on) for people and Make (like maruti suzuki, toyota, tata), Color, Type (like Hatchback, sedan, xuv), label (like car, truck, van, three wheeler, motorcycle) for Vehicles"
+                        },
+                        "label": {
+                          "bsonType": "string",
+                          "description": "Use this label for number plate"
+                        },
+                        "score": {
+                          "bsonType": "number",
+                          "description": "Use this for confidence for the blob"
+                        },
+                        "match_id": {
+                          "bsonType": "string",
+                          "description": "Use this match_id for name of the person"
+                        },
+                        "severity": {
+                          "bsonType": "string"
+                        },
+                        "subclass": {
+                          "bsonType": "string",
+                          "description": "Use this for subclass for the blob"
+                        }
+                      }
+                    }
+                  },
+                  "c_timestamp": {
+                    "bsonType": "date",
+                    "description": "Use this for timestamp"
+                  },
+                  "label": {
+                    "bsonType": "string",
+                    "description": "Use this label for number plate"
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  ],
+  "version": 1
+}
+    parsed_schema = schema_to_line_based(example_schema)
+    print(parsed_schema)

data_utils/utils.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import pandas as pd
+from typing_extensions import Any, List, Dict
+from loguru import logger
+from tqdm import tqdm
+from .base_conversion_utils import (
+    clean_query,
+    build_schema_maps,
+    convert_actual_code_to_modified_dict,
+    convert_modified_to_actual_code_string
+)
+from .line_based_parsing import (
+    clean_modified_dict,
+    convert_to_lines,
+    parse_line_based_query
+)
+from .schema_utils import schema_to_line_based
+def modify_single_row_base_form(mongo_query: str, schema: Dict[str, Any]) -> str:
+    """
+    Modifies a single MongoDB query string based on the provided schema and schema maps.
+    """
+    try:
+        # Clean the query
+        mongo_query = clean_query(mongo_query)
+        # Build schema maps
+        in2out, out2in = build_schema_maps(schema)
+        # Convert the actual code to modified code
+        modified_query = convert_actual_code_to_modified_dict(mongo_query, out2in)
+        # Collection Name
+        collection_name = schema["collections"][0]["name"]
+        # Convert the modified code back to actual code
+        reconstructed_query = convert_modified_to_actual_code_string(modified_query, in2out, collection_name)
+        # Clean the reconstructed query
+        reconstructed_query = clean_query(reconstructed_query)
+        if reconstructed_query != mongo_query:
+            return None, None, None, None, None, None
+        else:
+            return mongo_query, modified_query, collection_name, in2out, out2in, schema
+    except Exception as _:
+        return None, None, None, None, None, None
+def modify_all_rows_base_from(mongo_queries: List[str], schemas: List[Dict[str, Any]], nl_queries: List[str], additional_infos: List[str]) -> List[Dict[str, Any]]:
+    """
+    Modifies all MongoDB queries based on the provided schemas.
+    """
+    modified_queries = []
+    for i, (mongo_query, schema) in tqdm(enumerate(zip(mongo_queries, schemas)), total=len(mongo_queries), desc="Modifying Queries"):
+        mongo_query, modified_query, collection_name, in2out, out2in, schema = modify_single_row_base_form(mongo_query, schema)
+        if modified_query is not None:
+            modified_queries.append({
+                "mongo_query": mongo_query,
+                "natural_language_query": nl_queries[i],
+                "additional_info": additional_infos[i],
+                "modified_query": modified_query,
+                "collection_name": collection_name,
+                "in2out": in2out,
+                "out2in": out2in,
+                "schema": schema
+            })
+    return modified_queries
+def modify_line_based_parsing(modified_query_data: str) -> Dict[str, Any]:
+    """
+    Tests the line-based parsing of a modified MongoDB query.
+    """
+    try:
+        modified_query = clean_modified_dict(modified_query_data["modified_query"])
+        lines = convert_to_lines(modified_query)
+        reconstructed_query = parse_line_based_query(lines)
+        if modified_query != reconstructed_query:
+            return None
+        else:
+            modified_query_data["line_based_query"] = lines
+            return modified_query_data
+    except Exception as e:
+        return None
+def modify_all_line_based_parsing(modified_queries: List[Dict[str, Any]]):
+    """
+    Tests the line-based parsing for all modified MongoDB queries.
+    """
+    line_based_queries = []
+    for query_data in tqdm(modified_queries, desc="Testing Line-based Parsing", total=len(modified_queries)):
+        line_based_query = modify_line_based_parsing(query_data)
+        if line_based_query:
+            line_based_queries.append(line_based_query)
+    return line_based_queries
+def modify_all_schema(query_data: List[Dict[str, Any]]) -> List[str]:
+    """
+    Converts all schemas to line-based format.
+    """
+    final_data = []
+    for query in tqdm(query_data, desc="Converting Schemas to Line-based Format", total=len(query_data)):
+        # try:
+        line_based_schema = schema_to_line_based(query["schema"])
+        # if line_based_schema:
+        query["line_based_schema"] = line_based_schema
+        final_data.append(query)
+        # except Exception as e:
+        #     pass
+        # logger.debug(f"Line-based schema: {line_based_schema}")
+    return final_data
+def load_csv(file_path: str) -> pd.DataFrame:
+    """
+    Loads a CSV file into a pandas DataFrame.
+    """
+    try:
+        df = pd.read_csv(file_path)
+        logger.info(f"Loaded CSV file: {file_path}")
+        return df
+    except Exception as e:
+        logger.error(f"Error loading CSV file: {e}")
+        raise e
+def modify_dataframe(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Modifies a DataFrame by applying the modify_all_rows function.
+    """
+    logger.info("Modifying DataFrame...")
+    logger.debug(f"input DataFrame length: {len(df)}")
+    mongo_queries = df["mongo_query"].tolist()
+    schemas = df["schema"].apply(eval).tolist()
+    nl_queries = df["natural_language_query"].tolist()
+    additional_infos = df["additional_info"].tolist()
+    modified_queries = modify_all_rows_base_from(mongo_queries, schemas, nl_queries, additional_infos)
+    logger.debug(f"Modified queries length: {len(modified_queries)}")
+    line_based_queries = modify_all_line_based_parsing(modified_queries)
+    logger.debug(f"Line-based queries length: {len(line_based_queries)}")
+    final_data = modify_all_schema(line_based_queries)
+    logger.debug(f"Modified schemas length: {len(final_data)}")
+    return final_data
+def main(final_data: List[Dict[str, Any]]):
+    # try reconstructing original query from line-based query
+    for i in range(len(final_data)):
+        index_allowed = [746]
+        if i in index_allowed:
+            continue
+        original_query = final_data[i]["mongo_query"]
+        line_based_query = final_data[i]["line_based_query"]
+        # reconstructed modified query
+        reconstructed_modified_query = parse_line_based_query(line_based_query)
+        # reconstructed original query
+        reconstructed_original_query = convert_modified_to_actual_code_string(reconstructed_modified_query, final_data[i]["in2out"], final_data[i]["collection_name"])
+        if original_query != clean_query(reconstructed_original_query):
+            logger.error(f"index: {i}")
+            logger.error(f"Original query: {original_query}")
+            logger.error(f"Reconstructed original query: {reconstructed_original_query}")
+            logger.error(f"Modified query: {final_data[i]['modified_query']}")
+            logger.error(f"Reconstructed modified query: {reconstructed_modified_query}")
+            logger.error(f"Line-based query: {line_based_query}")
+            # logger.error(f"Schema: {final_data[i]['schema']}")
+            logger.warning("--------------------------------------------------")
+            assert original_query == clean_query(reconstructed_original_query), f"Original query does not match reconstructed original query at index {i}"
+    exit(0)
+if __name__ == "__main__":
+    pdf_path = "./data_v3/data_v2.csv"
+    df = load_csv(pdf_path)
+    final_data = modify_dataframe(df)
+    # main(final_data)
+    logger.info(f"Final data length: {len(final_data)}")
+    logger.debug(f"Final data type: {final_data[0]}\n\n")
+    for i, (query_data) in enumerate(final_data):
+        logger.debug(f"Modified schema {i}: {query_data['line_based_schema']}")
+        logger.debug(f"Line-based query {i}: {query_data['line_based_query']}")
+        logger.debug(f"NL query {i}: {query_data['natural_language_query']}")
+        logger.debug(f"Additional info {i}: {query_data['additional_info']}")
+        print('\n\n\n\n')
+        if i > 3:
+            break

requirements.txt ADDED Viewed

	@@ -0,0 +1,62 @@

+aiofiles>=23.0
+annotated-types>=0.5.0
+anyio>=3.7.1
+certifi>=2023.11.17
+charset-normalizer==3.4.2
+click==8.2.1
+diskcache==5.6.3
+fastapi==0.115.12
+ffmpy==0.6.0
+filelock==3.18.0
+fsspec==2025.5.1
+gradio==5.32.1
+gradio_client==1.10.2
+groovy==0.1.2
+h11==0.16.0
+hf-xet==1.1.3
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.32.4
+idna==3.10
+Jinja2==3.1.6
+llama_cpp_python==0.3.9
+loguru==0.7.3
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+numpy==2.2.6
+orjson==3.10.18
+packaging==25.0
+pandas==2.2.3
+pillow==11.2.1
+pydantic==2.11.5
+pydantic-settings==2.9.1
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+requests==2.32.3
+rich==14.0.0
+ruff==0.11.12
+safehttpx==0.1.6
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==2.3.6
+starlette==0.46.2
+starlette-context==0.4.0
+tomlkit==0.13.2
+tqdm==4.67.1
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+tzdata==2025.2
+urllib3==2.4.0
+uvicorn==0.34.3
+websockets==15.0.1
+wget