Penality commited on
Commit
be1a3b5
·
verified ·
1 Parent(s): 024816a

Update app.py

Browse files

moved metadata and faiss indexing to flask backend

Files changed (1) hide show
  1. app.py +8 -34
app.py CHANGED
@@ -32,45 +32,19 @@ embedding_dim = 768 # Adjust according to model
32
  def store_document_data(PDF_FILE, METADATA_FILE, INDEX_FILE):
33
  print(" Storing document...")
34
 
35
- if PDF_FILE:
36
  # Extract text from the PDF
37
  text = extract_text_from_pdf(PDF_FILE)
38
  if not text:
39
  return "Could not extract any text from the PDF."
40
-
41
- if METADATA_FILE:
42
- # extract metadata
43
- print(" Metadata file exists")
44
- with open(METADATA_FILE, "r") as f:
45
- metadata = json.load(f)
46
  else:
47
- print("metadata_file is empty")
48
- metadata = {}
49
-
50
- if INDEX_FILE:
51
- # extract Faiss
52
- print("index_file recieved")
53
- index = faiss.read_index(INDEX_FILE)
54
- else:
55
- print(" No FAISS index found. Creating a new one.")
56
- index = faiss.IndexFlatL2(embedding_dim) # Empty FAISS index
57
-
58
- # Generate and store embedding
59
- embedding = embedding_model.encode([text]).astype(np.float32)
60
- index.add(embedding) # Add to FAISS index
61
- print(" Embeddings generated")
62
-
63
- # Get FAISS index for the new document
64
- doc_index = index.ntotal - 1
65
-
66
- # Update metadata with FAISS index
67
- metadata[str(doc_index)] = PDF_FILE
68
- print(" Saved Metadata")
69
-
70
- return json.dumps({
71
- "metadata": metadata,
72
- "index": index
73
- })
74
 
75
  def retrieve_document(query):
76
  print(f"Retrieving document based on:\n{query}")
 
32
  def store_document_data(PDF_FILE, METADATA_FILE, INDEX_FILE):
33
  print(" Storing document...")
34
 
35
+ if PDF_FILE:
36
  # Extract text from the PDF
37
  text = extract_text_from_pdf(PDF_FILE)
38
  if not text:
39
  return "Could not extract any text from the PDF."
40
+
41
+ # Generate and return embedding
42
+ embedding = embedding_model.encode([text]).astype(np.float32)
43
+ print("Embeddings generated")
44
+
45
+ return embedding
46
  else:
47
+ return "No PDF file provided."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def retrieve_document(query):
50
  print(f"Retrieving document based on:\n{query}")