parse_py / app.py
broadfield-dev's picture
Update app.py
470905d verified
raw
history blame
6.2 kB
# app.py
from flask import Flask, request, render_template, jsonify, send_file
from parser import parse_python_code
import os
import json
import io
import sqlite3
from database import init_db, populate_sample_db
app = Flask(__name__)
def reconstruct_code(parts):
"""Reconstruct the original code from parsed parts."""
sorted_parts = sorted(parts, key=lambda p: p['location'][0])
return ''.join(part['source'] for part in sorted_parts)
def is_subsequence(subseq, seq):
"""Check if subseq is a subsequence of seq."""
it = iter(seq)
return all(item in it for item in subseq)
def query_programs(operations):
"""Query the database for programs matching the operations sequence."""
conn = sqlite3.connect('python_programs.db')
c = conn.cursor()
c.execute("SELECT id, code, sequence, vectors FROM programs")
results = []
for row in c.fetchall():
program_id, code, sequence_str, vectors_str = row
sequence = sequence_str.split(',')
vectors = eval(vectors_str) # Convert string back to list (use JSON in production)
if is_subsequence(operations, sequence):
# Compute similarity (simple average vector for now)
program_vector = sum(vectors, []) / len(vectors) if vectors else [0, 0, 0, 0, 0, 0]
query_vector = sum([create_vector(op, 0, (1, 1), 100, []) for op in operations], []) / len(operations) if operations else [0, 0, 0, 0, 0, 0]
similarity = cosine_similarity([program_vector], [query_vector])[0][0] if program_vector and query_vector else 0
results.append({'id': program_id, 'code': code, 'similarity': similarity})
conn.close()
return sorted(results, key=lambda x: x['similarity'], reverse=True)[:5] # Top 5 matches
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def create_vector(category, level, location, total_lines, parent_path):
"""Helper to create a vector for query (matches parser's create_vector)."""
category_map = {
'import': 1, 'function': 2, 'async_function': 3, 'class': 4,
'if': 5, 'while': 6, 'for': 7, 'try': 8, 'expression': 9, 'spacer': 10,
'other': 11, 'elif': 12, 'else': 13, 'except': 14, 'finally': 15, 'return': 16,
'assigned_variable': 17, 'input_variable': 18, 'returned_variable': 19
}
category_id = category_map.get(category, 0)
start_line, end_line = location
span = (end_line - start_line + 1) / total_lines
center_pos = ((start_line + end_line) / 2) / total_lines
parent_depth = len(parent_path)
parent_weight = sum(category_map.get(parent.split('[')[0].lower(), 0) * (1 / (i + 1))
for i, parent in enumerate(parent_path)) / max(1, len(category_map))
return [category_id, level, center_pos, span, parent_depth, parent_weight]
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
parts = None
filename = 'unnamed.py'
code_input = None
query_results = None
# Handle file upload or pasted code (parsing)
if 'file' in request.files and request.files['file'].filename:
file = request.files['file']
if not file.filename.endswith('.py'):
return 'Invalid file type. Please upload a Python file.', 400
filename = file.filename
file_path = os.path.join('uploads', filename)
file.save(file_path)
with open(file_path, 'r') as f:
code_input = f.read()
parts, sequence = parse_python_code(code_input)
# Store in database (for new files)
vectors = [part['vector'] for part in parts]
from database import store_program
store_program(code_input, sequence, vectors)
elif 'code' in request.form and request.form['code'].strip():
code_input = request.form['code']
filename = request.form.get('filename', 'unnamed.py') or 'unnamed.py'
if not filename.endswith('.py'):
filename += '.py'
parts, sequence = parse_python_code(code_input)
vectors = [part['vector'] for part in parts]
from database import store_program
store_program(code_input, sequence, vectors)
elif 'query_ops' in request.form and request.form['query_ops'].strip():
# Handle query for operations
operations = [op.strip() for op in request.form['query_ops'].split(',')]
query_results = query_programs(operations)
return render_template(
'results_partial.html',
parts=None,
filename=filename,
reconstructed_code=None,
code_input=None,
query_results=query_results
)
if parts:
indexed_parts = [{'index': i + 1, **part} for i, part in enumerate(parts)]
reconstructed_code = reconstruct_code(indexed_parts)
return render_template(
'results_partial.html',
parts=indexed_parts,
filename=filename,
reconstructed_code=reconstructed_code,
code_input=code_input,
query_results=None
)
return 'No file, code, or query provided', 400
# Initial page load
init_db() # Ensure database is initialized
populate_sample_db() # Populate with sample data
return render_template('index.html', parts=None, filename=None, reconstructed_code=None, code_input=None, query_results=None)
@app.route('/export_json', methods=['POST'])
def export_json():
parts = request.json.get('parts', [])
export_data = [{'vector': part['vector'], 'source': part['source']} for part in parts]
json_str = json.dumps(export_data, indent=2)
buffer = io.BytesIO(json_str.encode('utf-8'))
buffer.seek(0)
return send_file(
buffer,
as_attachment=True,
download_name='code_vectors.json',
mimetype='application/json'
)
if __name__ == '__main__':
if not os.path.exists('uploads'):
os.makedirs('uploads')
app.run(port=7860)