NaimaAqeel commited on
Commit
cd89674
·
verified ·
1 Parent(s): e264586

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -76,7 +76,8 @@ else:
76
  def upload_files(files):
77
  global index, document_texts
78
  try:
79
- for file_path in files:
 
80
  if file_path.endswith('.pdf'):
81
  text = extract_text_from_pdf(file_path)
82
  elif file_path.endswith('.docx'):
@@ -84,9 +85,12 @@ def upload_files(files):
84
  else:
85
  return "Unsupported file format"
86
 
 
 
87
  # Process the text and update FAISS index
88
  sentences = text.split("\n")
89
  embeddings = embedding_model.encode(sentences)
 
90
  index.add(np.array(embeddings))
91
  document_texts.append(text)
92
 
@@ -105,11 +109,15 @@ def upload_files(files):
105
 
106
  def query_text(text):
107
  try:
 
 
108
  # Encode the query text
109
  query_embedding = embedding_model.encode([text])
 
110
 
111
  # Search the FAISS index
112
  D, I = index.search(np.array(query_embedding), k=5)
 
113
 
114
  top_documents = []
115
  for idx in I[0]:
@@ -148,6 +156,7 @@ demo.launch()
148
 
149
 
150
 
 
151
 
152
 
153
 
 
76
  def upload_files(files):
77
  global index, document_texts
78
  try:
79
+ for file in files:
80
+ file_path = file.name # Get the file path from the NamedString object
81
  if file_path.endswith('.pdf'):
82
  text = extract_text_from_pdf(file_path)
83
  elif file_path.endswith('.docx'):
 
85
  else:
86
  return "Unsupported file format"
87
 
88
+ print(f"Extracted text: {text[:100]}...") # Debug: Show the first 100 characters of the extracted text
89
+
90
  # Process the text and update FAISS index
91
  sentences = text.split("\n")
92
  embeddings = embedding_model.encode(sentences)
93
+ print(f"Embeddings shape: {embeddings.shape}") # Debug: Show the shape of the embeddings
94
  index.add(np.array(embeddings))
95
  document_texts.append(text)
96
 
 
109
 
110
  def query_text(text):
111
  try:
112
+ print(f"Query text: {text}") # Debug: Show the query text
113
+
114
  # Encode the query text
115
  query_embedding = embedding_model.encode([text])
116
+ print(f"Query embedding shape: {query_embedding.shape}") # Debug: Show the shape of the query embedding
117
 
118
  # Search the FAISS index
119
  D, I = index.search(np.array(query_embedding), k=5)
120
+ print(f"Distances: {D}, Indices: {I}") # Debug: Show the distances and indices of the search results
121
 
122
  top_documents = []
123
  for idx in I[0]:
 
156
 
157
 
158
 
159
+
160
 
161
 
162