Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,7 @@ index_name = "main" # Your index name
|
|
23 |
index = pc.Index(index_name)
|
24 |
|
25 |
def get_embedding(text):
|
26 |
-
response = client.embeddings.create(input=text, model="text-embedding-
|
27 |
return response.data[0].embedding
|
28 |
|
29 |
def process_pdf(file):
|
@@ -106,8 +106,7 @@ def process_upload(upload_type, file_or_link, file_name=None):
|
|
106 |
|
107 |
def process_chunk(chunk, doc_id, i, upload_type, doc_name):
|
108 |
embedding = get_embedding(chunk)
|
109 |
-
|
110 |
-
return (f"{doc_id}_{i}", truncated_embedding, {
|
111 |
"text": chunk,
|
112 |
"type": upload_type,
|
113 |
"doc_id": doc_id,
|
@@ -118,9 +117,8 @@ def process_chunk(chunk, doc_id, i, upload_type, doc_name):
|
|
118 |
def get_relevant_context(query, top_k=5):
|
119 |
print(f"Getting relevant context for query: {query}")
|
120 |
query_embedding = get_embedding(query)
|
121 |
-
truncated_query_embedding = query_embedding[:200]
|
122 |
|
123 |
-
search_results = index.query(vector=
|
124 |
print(f"Found {len(search_results['matches'])} relevant results")
|
125 |
|
126 |
# Sort results by doc_id and chunk_index to maintain document structure
|
|
|
23 |
index = pc.Index(index_name)
|
24 |
|
25 |
def get_embedding(text):
|
26 |
+
response = client.embeddings.create(input=text, model="text-embedding-3-large")
|
27 |
return response.data[0].embedding
|
28 |
|
29 |
def process_pdf(file):
|
|
|
106 |
|
107 |
def process_chunk(chunk, doc_id, i, upload_type, doc_name):
|
108 |
embedding = get_embedding(chunk)
|
109 |
+
return (f"{doc_id}_{i}", embedding, {
|
|
|
110 |
"text": chunk,
|
111 |
"type": upload_type,
|
112 |
"doc_id": doc_id,
|
|
|
117 |
def get_relevant_context(query, top_k=5):
|
118 |
print(f"Getting relevant context for query: {query}")
|
119 |
query_embedding = get_embedding(query)
|
|
|
120 |
|
121 |
+
search_results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
|
122 |
print(f"Found {len(search_results['matches'])} relevant results")
|
123 |
|
124 |
# Sort results by doc_id and chunk_index to maintain document structure
|