broadfield-dev commited on
Commit
04dce48
·
verified ·
1 Parent(s): ae9ce58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -1
app.py CHANGED
@@ -5,7 +5,7 @@ import os
5
  import json
6
  import io
7
  import subprocess # To call process_hf_dataset.py
8
- from database import init_chromadb, store_program, query_programs, load_chromadb_from_hf, DB_NAME
9
  import logging
10
 
11
  # Set up logging
@@ -46,6 +46,10 @@ def index():
46
  vectors = [part['vector'] for part in parts]
47
  store_program(client, code_input, sequence, vectors, DB_NAME)
48
  logger.info(f"Stored code: {filename}")
 
 
 
 
49
  except Exception as e:
50
  logger.error(f"Error storing code {filename}: {e}")
51
  return f"Error storing code: {e}", 500
@@ -60,6 +64,10 @@ def index():
60
  vectors = [part['vector'] for part in parts]
61
  store_program(client, code_input, sequence, vectors, DB_NAME)
62
  logger.info(f"Stored code: {filename}")
 
 
 
 
63
  except Exception as e:
64
  logger.error(f"Error storing code {filename}: {e}")
65
  return f"Error storing code: {e}", 500
@@ -70,6 +78,8 @@ def index():
70
  client = load_chromadb_from_hf()
71
  query_results = query_programs(client, operations, DB_NAME)
72
  logger.info(f"Queried operations: {operations}")
 
 
73
  return render_template(
74
  'results_partial.html',
75
  parts=None,
@@ -88,6 +98,8 @@ def index():
88
  client = load_chromadb_from_hf()
89
  query_results = query_programs(client, None, DB_NAME, semantic_query=semantic_query)
90
  logger.info(f"Queried semantically: {semantic_query}")
 
 
91
  return render_template(
92
  'results_partial.html',
93
  parts=None,
@@ -111,12 +123,20 @@ def index():
111
  logger.warning(f"Failed to delete collection {DB_NAME}: {e}")
112
  collection = client.create_collection(DB_NAME)
113
  logger.info(f"Created fresh ChromaDB collection: {DB_NAME}")
 
 
 
 
114
 
115
  # Process dataset
116
  result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
117
  logger.info(f"Process Hugging Face dataset output: {result.stdout}")
118
  if result.stderr:
119
  logger.error(f"Process Hugging Face dataset errors: {result.stderr}")
 
 
 
 
120
  return render_template(
121
  'results_partial.html',
122
  parts=None,
@@ -139,12 +159,20 @@ def index():
139
  client = init_chromadb()
140
  collection = client.get_or_create_collection(DB_NAME)
141
  logger.info(f"Using existing or new ChromaDB collection: {DB_NAME}")
 
 
 
 
142
 
143
  # Process dataset
144
  result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
145
  logger.info(f"Load Hugging Face dataset output: {result.stdout}")
146
  if result.stderr:
147
  logger.error(f"Load Hugging Face dataset errors: {result.stderr}")
 
 
 
 
148
  return render_template(
149
  'results_partial.html',
150
  parts=None,
@@ -175,6 +203,9 @@ def index():
175
  if collection is None or not hasattr(collection, 'add'):
176
  raise ValueError("ChromaDB collection creation failed")
177
  logger.info("Verified ChromaDB collection is valid")
 
 
 
178
  return render_template(
179
  'results_partial.html',
180
  parts=None,
 
5
  import json
6
  import io
7
  import subprocess # To call process_hf_dataset.py
8
+ from database import init_chromadb, store_program, query_programs, load_chromadb_from_hf, DB_NAME, create_collection
9
  import logging
10
 
11
  # Set up logging
 
46
  vectors = [part['vector'] for part in parts]
47
  store_program(client, code_input, sequence, vectors, DB_NAME)
48
  logger.info(f"Stored code: {filename}")
49
+ # Verify storage
50
+ collection = create_collection(client, DB_NAME)
51
+ count = collection.count()
52
+ logger.info(f"ChromaDB now contains {count} entries")
53
  except Exception as e:
54
  logger.error(f"Error storing code {filename}: {e}")
55
  return f"Error storing code: {e}", 500
 
64
  vectors = [part['vector'] for part in parts]
65
  store_program(client, code_input, sequence, vectors, DB_NAME)
66
  logger.info(f"Stored code: {filename}")
67
+ # Verify storage
68
+ collection = create_collection(client, DB_NAME)
69
+ count = collection.count()
70
+ logger.info(f"ChromaDB now contains {count} entries")
71
  except Exception as e:
72
  logger.error(f"Error storing code {filename}: {e}")
73
  return f"Error storing code: {e}", 500
 
78
  client = load_chromadb_from_hf()
79
  query_results = query_programs(client, operations, DB_NAME)
80
  logger.info(f"Queried operations: {operations}")
81
+ # Verify query results
82
+ logger.info(f"Found {len(query_results)} matching programs in ChromaDB")
83
  return render_template(
84
  'results_partial.html',
85
  parts=None,
 
98
  client = load_chromadb_from_hf()
99
  query_results = query_programs(client, None, DB_NAME, semantic_query=semantic_query)
100
  logger.info(f"Queried semantically: {semantic_query}")
101
+ # Verify query results
102
+ logger.info(f"Found {len(query_results)} matching programs in ChromaDB")
103
  return render_template(
104
  'results_partial.html',
105
  parts=None,
 
123
  logger.warning(f"Failed to delete collection {DB_NAME}: {e}")
124
  collection = client.create_collection(DB_NAME)
125
  logger.info(f"Created fresh ChromaDB collection: {DB_NAME}")
126
+ # Verify collection
127
+ if collection is None or not hasattr(collection, 'add'):
128
+ raise ValueError("ChromaDB collection creation failed")
129
+ logger.info("Verified ChromaDB collection is valid")
130
 
131
  # Process dataset
132
  result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
133
  logger.info(f"Process Hugging Face dataset output: {result.stdout}")
134
  if result.stderr:
135
  logger.error(f"Process Hugging Face dataset errors: {result.stderr}")
136
+ # Verify database population
137
+ collection = create_collection(client, DB_NAME)
138
+ count = collection.count()
139
+ logger.info(f"ChromaDB now contains {count} entries after processing")
140
  return render_template(
141
  'results_partial.html',
142
  parts=None,
 
159
  client = init_chromadb()
160
  collection = client.get_or_create_collection(DB_NAME)
161
  logger.info(f"Using existing or new ChromaDB collection: {DB_NAME}")
162
+ # Verify collection
163
+ if collection is None or not hasattr(collection, 'add'):
164
+ raise ValueError("ChromaDB collection access failed")
165
+ logger.info("Verified ChromaDB collection is valid")
166
 
167
  # Process dataset
168
  result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
169
  logger.info(f"Load Hugging Face dataset output: {result.stdout}")
170
  if result.stderr:
171
  logger.error(f"Load Hugging Face dataset errors: {result.stderr}")
172
+ # Verify database population
173
+ collection = create_collection(client, DB_NAME)
174
+ count = collection.count()
175
+ logger.info(f"ChromaDB now contains {count} entries after loading")
176
  return render_template(
177
  'results_partial.html',
178
  parts=None,
 
203
  if collection is None or not hasattr(collection, 'add'):
204
  raise ValueError("ChromaDB collection creation failed")
205
  logger.info("Verified ChromaDB collection is valid")
206
+ # Verify collection is empty
207
+ count = collection.count()
208
+ logger.info(f"ChromaDB now contains {count} entries after reset (should be 0)")
209
  return render_template(
210
  'results_partial.html',
211
  parts=None,