Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,10 +4,9 @@ from parser import parse_python_code
|
|
4 |
import os
|
5 |
import json
|
6 |
import io
|
7 |
-
from database import init_chromadb, populate_sample_db, store_program, query_programs, load_chromadb_from_hf, HF_DATASET_NAME, HF_TOKEN
|
8 |
|
9 |
# User-configurable variables
|
10 |
-
DB_NAME = "python_programs" # ChromaDB collection name
|
11 |
UPLOAD_DIR = "./uploads" # Directory for uploads
|
12 |
|
13 |
app = Flask(__name__)
|
@@ -50,7 +49,7 @@ def index():
|
|
50 |
client = init_chromadb()
|
51 |
store_program(client, code_input, sequence, vectors, DB_NAME)
|
52 |
elif 'query_ops' in request.form and request.form['query_ops'].strip():
|
53 |
-
# Handle query for operations
|
54 |
operations = [op.strip() for op in request.form['query_ops'].split(',')]
|
55 |
client = load_chromadb_from_hf(HF_DATASET_NAME, HF_TOKEN) # Load from Hugging Face
|
56 |
query_results = query_programs(client, operations, DB_NAME)
|
@@ -62,6 +61,19 @@ def index():
|
|
62 |
code_input=None,
|
63 |
query_results=query_results
|
64 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
if parts:
|
67 |
indexed_parts = [{'index': i + 1, **part} for i, part in enumerate(parts)]
|
@@ -89,7 +101,8 @@ def index():
|
|
89 |
@app.route('/export_json', methods=['POST'])
|
90 |
def export_json():
|
91 |
parts = request.json.get('parts', [])
|
92 |
-
export_data = [{'vector': part['vector'], 'source': part['source']
|
|
|
93 |
json_str = json.dumps(export_data, indent=2)
|
94 |
buffer = io.BytesIO(json_str.encode('utf-8'))
|
95 |
buffer.seek(0)
|
@@ -100,6 +113,31 @@ def export_json():
|
|
100 |
mimetype='application/json'
|
101 |
)
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
if __name__ == '__main__':
|
104 |
if not os.path.exists(UPLOAD_DIR):
|
105 |
os.makedirs(UPLOAD_DIR)
|
|
|
4 |
import os
|
5 |
import json
|
6 |
import io
|
7 |
+
from database import init_chromadb, populate_sample_db, store_program, query_programs, load_chromadb_from_hf, HF_DATASET_NAME, HF_TOKEN, DB_NAME
|
8 |
|
9 |
# User-configurable variables
|
|
|
10 |
UPLOAD_DIR = "./uploads" # Directory for uploads
|
11 |
|
12 |
app = Flask(__name__)
|
|
|
49 |
client = init_chromadb()
|
50 |
store_program(client, code_input, sequence, vectors, DB_NAME)
|
51 |
elif 'query_ops' in request.form and request.form['query_ops'].strip():
|
52 |
+
# Handle query for operations (category sequence)
|
53 |
operations = [op.strip() for op in request.form['query_ops'].split(',')]
|
54 |
client = load_chromadb_from_hf(HF_DATASET_NAME, HF_TOKEN) # Load from Hugging Face
|
55 |
query_results = query_programs(client, operations, DB_NAME)
|
|
|
61 |
code_input=None,
|
62 |
query_results=query_results
|
63 |
)
|
64 |
+
elif 'semantic_query' in request.form and request.form['semantic_query'].strip():
|
65 |
+
# Handle semantic query (natural language description)
|
66 |
+
semantic_query = request.form['semantic_query']
|
67 |
+
client = load_chromadb_from_hf(HF_DATASET_NAME, HF_TOKEN) # Load from Hugging Face
|
68 |
+
query_results = query_programs(client, None, DB_NAME, semantic_query=semantic_query)
|
69 |
+
return render_template(
|
70 |
+
'results_partial.html',
|
71 |
+
parts=None,
|
72 |
+
filename=filename,
|
73 |
+
reconstructed_code=None,
|
74 |
+
code_input=None,
|
75 |
+
query_results=query_results
|
76 |
+
)
|
77 |
|
78 |
if parts:
|
79 |
indexed_parts = [{'index': i + 1, **part} for i, part in enumerate(parts)]
|
|
|
101 |
@app.route('/export_json', methods=['POST'])
|
102 |
def export_json():
|
103 |
parts = request.json.get('parts', [])
|
104 |
+
export_data = [{'vector': part['vector'], 'source': part['source'], 'description': generate_description_tokens([part['category']], [part['vector']])}
|
105 |
+
for part in parts]
|
106 |
json_str = json.dumps(export_data, indent=2)
|
107 |
buffer = io.BytesIO(json_str.encode('utf-8'))
|
108 |
buffer.seek(0)
|
|
|
113 |
mimetype='application/json'
|
114 |
)
|
115 |
|
116 |
+
def generate_description_tokens(sequence, vectors):
|
117 |
+
"""Generate semantic description tokens for a program based on its sequence and vectors."""
|
118 |
+
tokens = []
|
119 |
+
category_descriptions = {
|
120 |
+
'import': 'imports module',
|
121 |
+
'function': 'defines function',
|
122 |
+
'assigned_variable': 'assigns variable',
|
123 |
+
'input_variable': 'input parameter',
|
124 |
+
'returned_variable': 'returns value',
|
125 |
+
'if': 'conditional statement',
|
126 |
+
'return': 'returns result',
|
127 |
+
'try': 'try block',
|
128 |
+
'except': 'exception handler',
|
129 |
+
'expression': 'expression statement',
|
130 |
+
'spacer': 'empty line or comment'
|
131 |
+
}
|
132 |
+
|
133 |
+
for cat, vec in zip(sequence, vectors):
|
134 |
+
if cat in category_descriptions:
|
135 |
+
tokens.append(f"{category_descriptions[cat]}:{cat}")
|
136 |
+
# Add vector-derived features (e.g., level, span) as tokens
|
137 |
+
tokens.append(f"level:{vec[1]}")
|
138 |
+
tokens.append(f"span:{vec[3]:.2f}")
|
139 |
+
return " ".join(tokens)
|
140 |
+
|
141 |
if __name__ == '__main__':
|
142 |
if not os.path.exists(UPLOAD_DIR):
|
143 |
os.makedirs(UPLOAD_DIR)
|