Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,8 +5,9 @@ import os
|
|
5 |
import json
|
6 |
import io
|
7 |
import subprocess # To call process_hf_dataset.py
|
8 |
-
from database import init_chromadb, store_program, query_programs, load_chromadb_from_hf, DB_NAME, create_collection
|
9 |
import logging
|
|
|
10 |
|
11 |
# Set up logging
|
12 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
@@ -127,6 +128,25 @@ def index():
|
|
127 |
if collection is None or not hasattr(collection, 'add'):
|
128 |
raise ValueError("ChromaDB collection creation failed")
|
129 |
logger.info("Verified ChromaDB collection is valid")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
# Process dataset
|
132 |
result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
|
@@ -144,7 +164,7 @@ def index():
|
|
144 |
reconstructed_code=None,
|
145 |
code_input=None,
|
146 |
query_results=None,
|
147 |
-
message="Hugging Face dataset processed and stored successfully with fresh database."
|
148 |
)
|
149 |
except subprocess.CalledProcessError as e:
|
150 |
logger.error(f"Error processing Hugging Face dataset: {e.stderr}")
|
@@ -163,6 +183,9 @@ def index():
|
|
163 |
if collection is None or not hasattr(collection, 'add'):
|
164 |
raise ValueError("ChromaDB collection access failed")
|
165 |
logger.info("Verified ChromaDB collection is valid")
|
|
|
|
|
|
|
166 |
|
167 |
# Process dataset
|
168 |
result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
|
@@ -189,7 +212,7 @@ def index():
|
|
189 |
logger.error(f"Unexpected error loading Hugging Face dataset: {e}")
|
190 |
return f"Unexpected error loading Hugging Face dataset: {e}", 500
|
191 |
elif 'reset_db' in request.form:
|
192 |
-
# Reset ChromaDB collection (no repopulation with samples)
|
193 |
try:
|
194 |
client = init_chromadb()
|
195 |
try:
|
@@ -206,6 +229,23 @@ def index():
|
|
206 |
# Verify collection is empty
|
207 |
count = collection.count()
|
208 |
logger.info(f"ChromaDB now contains {count} entries after reset (should be 0)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
return render_template(
|
210 |
'results_partial.html',
|
211 |
parts=None,
|
@@ -213,7 +253,7 @@ def index():
|
|
213 |
reconstructed_code=None,
|
214 |
code_input=None,
|
215 |
query_results=None,
|
216 |
-
message="Database reset successfully."
|
217 |
)
|
218 |
except Exception as e:
|
219 |
logger.error(f"Error resetting database: {e}")
|
|
|
5 |
import json
|
6 |
import io
|
7 |
import subprocess # To call process_hf_dataset.py
|
8 |
+
from database import init_chromadb, store_program, query_programs, load_chromadb_from_hf, DB_NAME, create_collection, save_chromadb_to_hf
|
9 |
import logging
|
10 |
+
from datasets import Dataset
|
11 |
|
12 |
# Set up logging
|
13 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
128 |
if collection is None or not hasattr(collection, 'add'):
|
129 |
raise ValueError("ChromaDB collection creation failed")
|
130 |
logger.info("Verified ChromaDB collection is valid")
|
131 |
+
# Verify collection is empty
|
132 |
+
count = collection.count()
|
133 |
+
logger.info(f"ChromaDB now contains {count} entries after reset (should be 0)")
|
134 |
+
|
135 |
+
# Reset Hugging Face dataset (replace with empty dataset)
|
136 |
+
try:
|
137 |
+
empty_data = {
|
138 |
+
"code": [],
|
139 |
+
"sequence": [],
|
140 |
+
"vectors": [],
|
141 |
+
"description_tokens": [],
|
142 |
+
"program_vectors": []
|
143 |
+
}
|
144 |
+
empty_dataset = Dataset.from_dict(empty_data)
|
145 |
+
empty_dataset.push_to_hub(HF_DATASET_NAME, token=os.getenv("HF_KEY"))
|
146 |
+
logger.info(f"Replaced Hugging Face dataset {HF_DATASET_NAME} with empty dataset")
|
147 |
+
except Exception as e:
|
148 |
+
logger.error(f"Error replacing Hugging Face dataset: {e}")
|
149 |
+
raise
|
150 |
|
151 |
# Process dataset
|
152 |
result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
|
|
|
164 |
reconstructed_code=None,
|
165 |
code_input=None,
|
166 |
query_results=None,
|
167 |
+
message="Hugging Face dataset processed and stored successfully with fresh database and empty dataset."
|
168 |
)
|
169 |
except subprocess.CalledProcessError as e:
|
170 |
logger.error(f"Error processing Hugging Face dataset: {e.stderr}")
|
|
|
183 |
if collection is None or not hasattr(collection, 'add'):
|
184 |
raise ValueError("ChromaDB collection access failed")
|
185 |
logger.info("Verified ChromaDB collection is valid")
|
186 |
+
# Verify collection state
|
187 |
+
count = collection.count()
|
188 |
+
logger.info(f"ChromaDB contains {count} entries before loading")
|
189 |
|
190 |
# Process dataset
|
191 |
result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
|
|
|
212 |
logger.error(f"Unexpected error loading Hugging Face dataset: {e}")
|
213 |
return f"Unexpected error loading Hugging Face dataset: {e}", 500
|
214 |
elif 'reset_db' in request.form:
|
215 |
+
# Reset ChromaDB collection and Hugging Face dataset (no repopulation with samples)
|
216 |
try:
|
217 |
client = init_chromadb()
|
218 |
try:
|
|
|
229 |
# Verify collection is empty
|
230 |
count = collection.count()
|
231 |
logger.info(f"ChromaDB now contains {count} entries after reset (should be 0)")
|
232 |
+
|
233 |
+
# Reset Hugging Face dataset (replace with empty dataset)
|
234 |
+
try:
|
235 |
+
empty_data = {
|
236 |
+
"code": [],
|
237 |
+
"sequence": [],
|
238 |
+
"vectors": [],
|
239 |
+
"description_tokens": [],
|
240 |
+
"program_vectors": []
|
241 |
+
}
|
242 |
+
empty_dataset = Dataset.from_dict(empty_data)
|
243 |
+
empty_dataset.push_to_hub(HF_DATASET_NAME, token=os.getenv("HF_KEY"))
|
244 |
+
logger.info(f"Replaced Hugging Face dataset {HF_DATASET_NAME} with empty dataset")
|
245 |
+
except Exception as e:
|
246 |
+
logger.error(f"Error replacing Hugging Face dataset: {e}")
|
247 |
+
raise
|
248 |
+
|
249 |
return render_template(
|
250 |
'results_partial.html',
|
251 |
parts=None,
|
|
|
253 |
reconstructed_code=None,
|
254 |
code_input=None,
|
255 |
query_results=None,
|
256 |
+
message="Database and Hugging Face dataset reset successfully."
|
257 |
)
|
258 |
except Exception as e:
|
259 |
logger.error(f"Error resetting database: {e}")
|