thesnak commited on
Commit
bb13b3d
·
verified ·
1 Parent(s): 3e07fd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -7
app.py CHANGED
@@ -19,22 +19,39 @@ text_chunks = []
19
  def extract_text_from_pdf(pdf_file):
20
  """Extract text from a PDF file."""
21
  text = ""
22
- with pdfplumber.open(pdf_file) as pdf:
23
- for page in pdf.pages:
24
- text += page.extract_text()
 
 
 
25
  return text
26
 
27
- def index_text_chunks(text):
28
  """Split text into chunks, generate embeddings, and index them."""
29
  global text_chunks, index
30
- text_chunks = text.split("\n\n") # Split by paragraphs
 
 
 
 
 
 
 
 
 
31
  embeddings = embedding_model.encode(text_chunks)
 
 
32
  index = faiss.IndexFlatL2(dimension)
33
  index.add(np.array(embeddings))
34
- return "Paper uploaded and indexed successfully!"
 
35
 
36
  def answer_question(question):
37
  """Retrieve relevant chunks and generate an answer."""
 
 
38
  if not text_chunks:
39
  return "Please upload a paper first."
40
 
@@ -56,7 +73,7 @@ with gr.Blocks() as demo:
56
  gr.Markdown("Upload a PDF of your research paper and ask questions about it.")
57
 
58
  with gr.Row():
59
- pdf_input = gr.File(label="Upload PDF")
60
  upload_status = gr.Textbox(label="Upload Status", interactive=False)
61
 
62
  with gr.Row():
 
19
  def extract_text_from_pdf(pdf_file):
20
  """Extract text from a PDF file."""
21
  text = ""
22
+ try:
23
+ with pdfplumber.open(pdf_file) as pdf:
24
+ for page in pdf.pages:
25
+ text += page.extract_text() or "" # Handle empty pages
26
+ except Exception as e:
27
+ return f"Error extracting text: {e}"
28
  return text
29
 
30
+ def index_text_chunks(pdf_file):
31
  """Split text into chunks, generate embeddings, and index them."""
32
  global text_chunks, index
33
+
34
+ # Extract text from the uploaded PDF
35
+ text = extract_text_from_pdf(pdf_file)
36
+ if not text:
37
+ return "No text extracted from the PDF. Please upload a valid PDF file."
38
+
39
+ # Split text into chunks (e.g., paragraphs)
40
+ text_chunks = [chunk for chunk in text.split("\n\n") if chunk.strip()]
41
+
42
+ # Generate embeddings for the chunks
43
  embeddings = embedding_model.encode(text_chunks)
44
+
45
+ # Build the FAISS index
46
  index = faiss.IndexFlatL2(dimension)
47
  index.add(np.array(embeddings))
48
+
49
+ return f"Paper uploaded and indexed successfully! Found {len(text_chunks)} chunks."
50
 
51
  def answer_question(question):
52
  """Retrieve relevant chunks and generate an answer."""
53
+ global text_chunks, index
54
+
55
  if not text_chunks:
56
  return "Please upload a paper first."
57
 
 
73
  gr.Markdown("Upload a PDF of your research paper and ask questions about it.")
74
 
75
  with gr.Row():
76
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
77
  upload_status = gr.Textbox(label="Upload Status", interactive=False)
78
 
79
  with gr.Row():