poemsforaphrodite commited on
Commit
8ae9422
·
verified ·
1 Parent(s): 7a55ced

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -5
app.py CHANGED
@@ -23,7 +23,7 @@ index_name = "main" # Your index name
23
  index = pc.Index(index_name)
24
 
25
  def get_embedding(text):
26
- response = client.embeddings.create(input=text, model="text-embedding-ada-002")
27
  return response.data[0].embedding
28
 
29
  def process_pdf(file):
@@ -106,8 +106,7 @@ def process_upload(upload_type, file_or_link, file_name=None):
106
 
107
  def process_chunk(chunk, doc_id, i, upload_type, doc_name):
108
  embedding = get_embedding(chunk)
109
- truncated_embedding = embedding[:200]
110
- return (f"{doc_id}_{i}", truncated_embedding, {
111
  "text": chunk,
112
  "type": upload_type,
113
  "doc_id": doc_id,
@@ -118,9 +117,8 @@ def process_chunk(chunk, doc_id, i, upload_type, doc_name):
118
  def get_relevant_context(query, top_k=5):
119
  print(f"Getting relevant context for query: {query}")
120
  query_embedding = get_embedding(query)
121
- truncated_query_embedding = query_embedding[:200]
122
 
123
- search_results = index.query(vector=truncated_query_embedding, top_k=top_k, include_metadata=True)
124
  print(f"Found {len(search_results['matches'])} relevant results")
125
 
126
  # Sort results by doc_id and chunk_index to maintain document structure
 
23
  index = pc.Index(index_name)
24
 
25
  def get_embedding(text):
26
+ response = client.embeddings.create(input=text, model="text-embedding-3-large")
27
  return response.data[0].embedding
28
 
29
  def process_pdf(file):
 
106
 
107
  def process_chunk(chunk, doc_id, i, upload_type, doc_name):
108
  embedding = get_embedding(chunk)
109
+ return (f"{doc_id}_{i}", embedding, {
 
110
  "text": chunk,
111
  "type": upload_type,
112
  "doc_id": doc_id,
 
117
  def get_relevant_context(query, top_k=5):
118
  print(f"Getting relevant context for query: {query}")
119
  query_embedding = get_embedding(query)
 
120
 
121
+ search_results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
122
  print(f"Found {len(search_results['matches'])} relevant results")
123
 
124
  # Sort results by doc_id and chunk_index to maintain document structure