NaimaAqeel commited on
Commit
ee5b33f
·
verified ·
1 Parent(s): 22de3ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -8,11 +8,11 @@ from transformers import pipeline
8
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
9
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
10
 
11
- def extract_text(file):
12
- if file.name.endswith(".pdf"):
13
- return "\n".join([page.extract_text() or "" for page in PdfReader(file).pages])
14
- elif file.name.endswith(".docx"):
15
- return "\n".join([p.text for p in docx.Document(file).paragraphs])
16
  return ""
17
 
18
  def chunk_text(text, chunk_size=500):
@@ -28,11 +28,11 @@ def chunk_text(text, chunk_size=500):
28
  chunks.append(buffer.strip())
29
  return chunks
30
 
31
- def ask_question(file, question, history):
32
- if not file:
33
  return "Please upload a file.", history
34
 
35
- text = extract_text(file)
36
  chunks = chunk_text(text)
37
  emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
38
  emb_question = embedder.encode(question, convert_to_tensor=True)
@@ -55,7 +55,7 @@ with gr.Blocks() as demo:
55
  file_input = gr.File(
56
  label="Choose a PDF or Word file",
57
  file_types=[".pdf", ".docx"],
58
- type="file"
59
  )
60
 
61
  chatbot = gr.Chatbot(label="💬 Chat with Document")
 
8
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
9
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
10
 
11
+ def extract_text(file_path):
12
+ if file_path.endswith(".pdf"):
13
+ return "\n".join([page.extract_text() or "" for page in PdfReader(file_path).pages])
14
+ elif file_path.endswith(".docx"):
15
+ return "\n".join([p.text for p in docx.Document(file_path).paragraphs])
16
  return ""
17
 
18
  def chunk_text(text, chunk_size=500):
 
28
  chunks.append(buffer.strip())
29
  return chunks
30
 
31
+ def ask_question(file_path, question, history):
32
+ if not file_path:
33
  return "Please upload a file.", history
34
 
35
+ text = extract_text(file_path)
36
  chunks = chunk_text(text)
37
  emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
38
  emb_question = embedder.encode(question, convert_to_tensor=True)
 
55
  file_input = gr.File(
56
  label="Choose a PDF or Word file",
57
  file_types=[".pdf", ".docx"],
58
+ type="filepath" # ✅ Fixed for Hugging Face
59
  )
60
 
61
  chatbot = gr.Chatbot(label="💬 Chat with Document")