Spaces:
Running
Running
# app.py | |
from flask import Flask, request, render_template, jsonify, send_file | |
from parser import parse_python_code | |
import os | |
import json | |
import io | |
from database import init_chromadb, populate_sample_db, store_program, query_programs, load_chromadb_from_hf, HF_DATASET_NAME, HF_TOKEN | |
# User-configurable variables | |
DB_NAME = "python_programs" # ChromaDB collection name | |
UPLOAD_DIR = "./uploads" # Directory for uploads | |
app = Flask(__name__) | |
def reconstruct_code(parts): | |
"""Reconstruct the original code from parsed parts.""" | |
sorted_parts = sorted(parts, key=lambda p: p['location'][0]) | |
return ''.join(part['source'] for part in sorted_parts) | |
def index(): | |
if request.method == 'POST': | |
parts = None | |
filename = 'unnamed.py' | |
code_input = None | |
query_results = None | |
# Handle file upload or pasted code (parsing) | |
if 'file' in request.files and request.files['file'].filename: | |
file = request.files['file'] | |
if not file.filename.endswith('.py'): | |
return 'Invalid file type. Please upload a Python file.', 400 | |
filename = file.filename | |
file_path = os.path.join(UPLOAD_DIR, filename) | |
file.save(file_path) | |
with open(file_path, 'r') as f: | |
code_input = f.read() | |
parts, sequence = parse_python_code(code_input) | |
# Store in ChromaDB | |
client = init_chromadb() | |
vectors = [part['vector'] for part in parts] | |
store_program(client, code_input, sequence, vectors, DB_NAME) | |
elif 'code' in request.form and request.form['code'].strip(): | |
code_input = request.form['code'] | |
filename = request.form.get('filename', 'unnamed.py') or 'unnamed.py' | |
if not filename.endswith('.py'): | |
filename += '.py' | |
parts, sequence = parse_python_code(code_input) | |
vectors = [part['vector'] for part in parts] | |
client = init_chromadb() | |
store_program(client, code_input, sequence, vectors, DB_NAME) | |
elif 'query_ops' in request.form and request.form['query_ops'].strip(): | |
# Handle query for operations | |
operations = [op.strip() for op in request.form['query_ops'].split(',')] | |
client = load_chromadb_from_hf(HF_DATASET_NAME, HF_TOKEN) # Load from Hugging Face | |
query_results = query_programs(client, operations, DB_NAME) | |
return render_template( | |
'results_partial.html', | |
parts=None, | |
filename=filename, | |
reconstructed_code=None, | |
code_input=None, | |
query_results=query_results | |
) | |
if parts: | |
indexed_parts = [{'index': i + 1, **part} for i, part in enumerate(parts)] | |
reconstructed_code = reconstruct_code(indexed_parts) | |
return render_template( | |
'results_partial.html', | |
parts=indexed_parts, | |
filename=filename, | |
reconstructed_code=reconstructed_code, | |
code_input=code_input, | |
query_results=None | |
) | |
return 'No file, code, or query provided', 400 | |
# Initial page load | |
client = load_chromadb_from_hf(HF_DATASET_NAME, HF_TOKEN) # Load from Hugging Face on startup | |
# If no dataset exists locally, populate with samples | |
try: | |
if not client.list_collections()[0].name == DB_NAME: | |
populate_sample_db(client) | |
except: | |
populate_sample_db(client) | |
return render_template('index.html', parts=None, filename=None, reconstructed_code=None, code_input=None, query_results=None) | |
def export_json(): | |
parts = request.json.get('parts', []) | |
export_data = [{'vector': part['vector'], 'source': part['source']} for part in parts] | |
json_str = json.dumps(export_data, indent=2) | |
buffer = io.BytesIO(json_str.encode('utf-8')) | |
buffer.seek(0) | |
return send_file( | |
buffer, | |
as_attachment=True, | |
download_name='code_vectors.json', | |
mimetype='application/json' | |
) | |
if __name__ == '__main__': | |
if not os.path.exists(UPLOAD_DIR): | |
os.makedirs(UPLOAD_DIR) | |
app.run(host="0.0.0.0", port=7860) |