Bofandra commited on
Commit
2828f4b
·
verified ·
1 Parent(s): 8aea9a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -39,12 +39,18 @@ def save_pdf(file, title):
39
  # Extract text
40
  reader = PdfReader(file.name)
41
  full_text = "\n".join(p.extract_text() for p in reader.pages if p.extract_text())
 
42
 
43
  # Chunk text
44
  chunks = [full_text[i:i+500] for i in range(0, len(full_text), 500)]
45
 
46
  # Embed and index
47
  embeddings = embedder.encode(chunks)
 
 
 
 
 
48
  index = faiss.IndexFlatL2(embeddings.shape[1])
49
  index.add(embeddings)
50
 
@@ -59,7 +65,7 @@ def save_pdf(file, title):
59
  upload_to_hub(index_path, f"data/{title}/index.faiss")
60
  upload_to_hub(chunks_path, f"data/{title}/chunks.pkl")
61
 
62
- return f"✅ Saved and indexed '{title}', and uploaded to Hub."
63
 
64
  # Return all available PDF titles
65
  def list_titles():
 
39
  # Extract text
40
  reader = PdfReader(file.name)
41
  full_text = "\n".join(p.extract_text() for p in reader.pages if p.extract_text())
42
+ print(full_text)
43
 
44
  # Chunk text
45
  chunks = [full_text[i:i+500] for i in range(0, len(full_text), 500)]
46
 
47
  # Embed and index
48
  embeddings = embedder.encode(chunks)
49
+
50
+ print("Embeddings shape:", embeddings.shape)
51
+ if len(embeddings.shape) != 2:
52
+ raise ValueError(f"Expected 2D embeddings, got shape {embeddings.shape}")
53
+
54
  index = faiss.IndexFlatL2(embeddings.shape[1])
55
  index.add(embeddings)
56
 
 
65
  upload_to_hub(index_path, f"data/{title}/index.faiss")
66
  upload_to_hub(chunks_path, f"data/{title}/chunks.pkl")
67
 
68
+ return f"✅ Saved and indexed '{title}', and uploaded to Hub. Please reload (refresh) the page."
69
 
70
  # Return all available PDF titles
71
  def list_titles():