Spaces:
Running
Running
File size: 5,581 Bytes
0a65f9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
from typing import Dict, Any
def schema_to_line_based(schema: dict) -> str:
"""
Converts a schema dictionary to a line-based format:
field // description and format (str, int, ...)
Only shows field names without parent prefix (e.g. 'age' instead of 'involved_persons.age')
"""
def get_type(info):
return info.get("bsonType") or info.get("type") or ""
def process_properties(properties: dict) -> list:
lines = []
for field, info in properties.items():
typ = get_type(info)
desc = info.get("description", "")
fmt = info.get("format", "")
# Compose type/format string
type_fmt = typ
if fmt:
type_fmt += f", {fmt}"
# Compose comment
comment = desc.strip()
if type_fmt:
comment = f"{comment} ({type_fmt})" if comment else f"({type_fmt})"
lines.append(f"{field} // {comment}" if comment else field)
# Recursively process nested objects and arrays, but only add the field names without prefix
if typ == "object" and "properties" in info:
for nested_line in process_properties(info["properties"]):
lines.append(nested_line)
elif typ == "array" and "items" in info:
items = info["items"]
if get_type(items) == "object" and "properties" in items:
for nested_line in process_properties(items["properties"]):
lines.append(nested_line)
return lines
collections = schema.get("collections", [])
if not collections:
return ""
collection = collections[0]
# Support both "document" and direct "properties"
if "document" in collection and "properties" in collection["document"]:
properties = collection["document"]["properties"]
else:
properties = collection.get("properties", {})
return "\n".join(process_properties(properties))
if __name__ == "__main__":
example_schema = {
"collections": [
{
"name": "events",
"document": {
"bsonType": "object",
"properties": {
"identifier": {
"bsonType": "object",
"properties": {
"camgroup_id": {
"bsonType": "string",
"description": "Use this to filter events by group"
},
"task_id": {
"bsonType": "string",
"description": "Use this to filter events by tasks"
},
"camera_id": {
"bsonType": "string",
"description": "Use this to filter events by camera"
}
}
},
"response": {
"bsonType": "object",
"properties": {
"event": {
"bsonType": "object",
"properties": {
"severity": {
"bsonType": "string",
"description": "Can be Low, Medium, Critical"
},
"type": {
"bsonType": "string",
"description": "Type of the event. Use this to filter events of person and vehicle"
},
"blobs": {
"bsonType": "array",
"items": {
"bsonType": "object",
"properties": {
"url": {
"bsonType": "string"
},
"attribs": {
"bsonType": "object",
"description": "Use this for attributes like Gender (Only Male, Female), Upper Clothing, Lower Clothing, Age (Ranges like 20-30, 30-40 and so on) for people and Make (like maruti suzuki, toyota, tata), Color, Type (like Hatchback, sedan, xuv), label (like car, truck, van, three wheeler, motorcycle) for Vehicles"
},
"label": {
"bsonType": "string",
"description": "Use this label for number plate"
},
"score": {
"bsonType": "number",
"description": "Use this for confidence for the blob"
},
"match_id": {
"bsonType": "string",
"description": "Use this match_id for name of the person"
},
"severity": {
"bsonType": "string"
},
"subclass": {
"bsonType": "string",
"description": "Use this for subclass for the blob"
}
}
}
},
"c_timestamp": {
"bsonType": "date",
"description": "Use this for timestamp"
},
"label": {
"bsonType": "string",
"description": "Use this label for number plate"
}
}
}
}
}
}
}
}
],
"version": 1
}
parsed_schema = schema_to_line_based(example_schema)
print(parsed_schema)
|