Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import os
|
|
5 |
import json
|
6 |
import io
|
7 |
import subprocess # To call process_hf_dataset.py
|
8 |
-
from database import init_chromadb, store_program, query_programs, load_chromadb_from_hf, DB_NAME
|
9 |
import logging
|
10 |
|
11 |
# Set up logging
|
@@ -46,6 +46,10 @@ def index():
|
|
46 |
vectors = [part['vector'] for part in parts]
|
47 |
store_program(client, code_input, sequence, vectors, DB_NAME)
|
48 |
logger.info(f"Stored code: {filename}")
|
|
|
|
|
|
|
|
|
49 |
except Exception as e:
|
50 |
logger.error(f"Error storing code {filename}: {e}")
|
51 |
return f"Error storing code: {e}", 500
|
@@ -60,6 +64,10 @@ def index():
|
|
60 |
vectors = [part['vector'] for part in parts]
|
61 |
store_program(client, code_input, sequence, vectors, DB_NAME)
|
62 |
logger.info(f"Stored code: {filename}")
|
|
|
|
|
|
|
|
|
63 |
except Exception as e:
|
64 |
logger.error(f"Error storing code {filename}: {e}")
|
65 |
return f"Error storing code: {e}", 500
|
@@ -70,6 +78,8 @@ def index():
|
|
70 |
client = load_chromadb_from_hf()
|
71 |
query_results = query_programs(client, operations, DB_NAME)
|
72 |
logger.info(f"Queried operations: {operations}")
|
|
|
|
|
73 |
return render_template(
|
74 |
'results_partial.html',
|
75 |
parts=None,
|
@@ -88,6 +98,8 @@ def index():
|
|
88 |
client = load_chromadb_from_hf()
|
89 |
query_results = query_programs(client, None, DB_NAME, semantic_query=semantic_query)
|
90 |
logger.info(f"Queried semantically: {semantic_query}")
|
|
|
|
|
91 |
return render_template(
|
92 |
'results_partial.html',
|
93 |
parts=None,
|
@@ -111,12 +123,20 @@ def index():
|
|
111 |
logger.warning(f"Failed to delete collection {DB_NAME}: {e}")
|
112 |
collection = client.create_collection(DB_NAME)
|
113 |
logger.info(f"Created fresh ChromaDB collection: {DB_NAME}")
|
|
|
|
|
|
|
|
|
114 |
|
115 |
# Process dataset
|
116 |
result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
|
117 |
logger.info(f"Process Hugging Face dataset output: {result.stdout}")
|
118 |
if result.stderr:
|
119 |
logger.error(f"Process Hugging Face dataset errors: {result.stderr}")
|
|
|
|
|
|
|
|
|
120 |
return render_template(
|
121 |
'results_partial.html',
|
122 |
parts=None,
|
@@ -139,12 +159,20 @@ def index():
|
|
139 |
client = init_chromadb()
|
140 |
collection = client.get_or_create_collection(DB_NAME)
|
141 |
logger.info(f"Using existing or new ChromaDB collection: {DB_NAME}")
|
|
|
|
|
|
|
|
|
142 |
|
143 |
# Process dataset
|
144 |
result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
|
145 |
logger.info(f"Load Hugging Face dataset output: {result.stdout}")
|
146 |
if result.stderr:
|
147 |
logger.error(f"Load Hugging Face dataset errors: {result.stderr}")
|
|
|
|
|
|
|
|
|
148 |
return render_template(
|
149 |
'results_partial.html',
|
150 |
parts=None,
|
@@ -175,6 +203,9 @@ def index():
|
|
175 |
if collection is None or not hasattr(collection, 'add'):
|
176 |
raise ValueError("ChromaDB collection creation failed")
|
177 |
logger.info("Verified ChromaDB collection is valid")
|
|
|
|
|
|
|
178 |
return render_template(
|
179 |
'results_partial.html',
|
180 |
parts=None,
|
|
|
5 |
import json
|
6 |
import io
|
7 |
import subprocess # To call process_hf_dataset.py
|
8 |
+
from database import init_chromadb, store_program, query_programs, load_chromadb_from_hf, DB_NAME, create_collection
|
9 |
import logging
|
10 |
|
11 |
# Set up logging
|
|
|
46 |
vectors = [part['vector'] for part in parts]
|
47 |
store_program(client, code_input, sequence, vectors, DB_NAME)
|
48 |
logger.info(f"Stored code: {filename}")
|
49 |
+
# Verify storage
|
50 |
+
collection = create_collection(client, DB_NAME)
|
51 |
+
count = collection.count()
|
52 |
+
logger.info(f"ChromaDB now contains {count} entries")
|
53 |
except Exception as e:
|
54 |
logger.error(f"Error storing code {filename}: {e}")
|
55 |
return f"Error storing code: {e}", 500
|
|
|
64 |
vectors = [part['vector'] for part in parts]
|
65 |
store_program(client, code_input, sequence, vectors, DB_NAME)
|
66 |
logger.info(f"Stored code: {filename}")
|
67 |
+
# Verify storage
|
68 |
+
collection = create_collection(client, DB_NAME)
|
69 |
+
count = collection.count()
|
70 |
+
logger.info(f"ChromaDB now contains {count} entries")
|
71 |
except Exception as e:
|
72 |
logger.error(f"Error storing code {filename}: {e}")
|
73 |
return f"Error storing code: {e}", 500
|
|
|
78 |
client = load_chromadb_from_hf()
|
79 |
query_results = query_programs(client, operations, DB_NAME)
|
80 |
logger.info(f"Queried operations: {operations}")
|
81 |
+
# Verify query results
|
82 |
+
logger.info(f"Found {len(query_results)} matching programs in ChromaDB")
|
83 |
return render_template(
|
84 |
'results_partial.html',
|
85 |
parts=None,
|
|
|
98 |
client = load_chromadb_from_hf()
|
99 |
query_results = query_programs(client, None, DB_NAME, semantic_query=semantic_query)
|
100 |
logger.info(f"Queried semantically: {semantic_query}")
|
101 |
+
# Verify query results
|
102 |
+
logger.info(f"Found {len(query_results)} matching programs in ChromaDB")
|
103 |
return render_template(
|
104 |
'results_partial.html',
|
105 |
parts=None,
|
|
|
123 |
logger.warning(f"Failed to delete collection {DB_NAME}: {e}")
|
124 |
collection = client.create_collection(DB_NAME)
|
125 |
logger.info(f"Created fresh ChromaDB collection: {DB_NAME}")
|
126 |
+
# Verify collection
|
127 |
+
if collection is None or not hasattr(collection, 'add'):
|
128 |
+
raise ValueError("ChromaDB collection creation failed")
|
129 |
+
logger.info("Verified ChromaDB collection is valid")
|
130 |
|
131 |
# Process dataset
|
132 |
result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
|
133 |
logger.info(f"Process Hugging Face dataset output: {result.stdout}")
|
134 |
if result.stderr:
|
135 |
logger.error(f"Process Hugging Face dataset errors: {result.stderr}")
|
136 |
+
# Verify database population
|
137 |
+
collection = create_collection(client, DB_NAME)
|
138 |
+
count = collection.count()
|
139 |
+
logger.info(f"ChromaDB now contains {count} entries after processing")
|
140 |
return render_template(
|
141 |
'results_partial.html',
|
142 |
parts=None,
|
|
|
159 |
client = init_chromadb()
|
160 |
collection = client.get_or_create_collection(DB_NAME)
|
161 |
logger.info(f"Using existing or new ChromaDB collection: {DB_NAME}")
|
162 |
+
# Verify collection
|
163 |
+
if collection is None or not hasattr(collection, 'add'):
|
164 |
+
raise ValueError("ChromaDB collection access failed")
|
165 |
+
logger.info("Verified ChromaDB collection is valid")
|
166 |
|
167 |
# Process dataset
|
168 |
result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
|
169 |
logger.info(f"Load Hugging Face dataset output: {result.stdout}")
|
170 |
if result.stderr:
|
171 |
logger.error(f"Load Hugging Face dataset errors: {result.stderr}")
|
172 |
+
# Verify database population
|
173 |
+
collection = create_collection(client, DB_NAME)
|
174 |
+
count = collection.count()
|
175 |
+
logger.info(f"ChromaDB now contains {count} entries after loading")
|
176 |
return render_template(
|
177 |
'results_partial.html',
|
178 |
parts=None,
|
|
|
203 |
if collection is None or not hasattr(collection, 'add'):
|
204 |
raise ValueError("ChromaDB collection creation failed")
|
205 |
logger.info("Verified ChromaDB collection is valid")
|
206 |
+
# Verify collection is empty
|
207 |
+
count = collection.count()
|
208 |
+
logger.info(f"ChromaDB now contains {count} entries after reset (should be 0)")
|
209 |
return render_template(
|
210 |
'results_partial.html',
|
211 |
parts=None,
|