NaimaAqeel commited on
Commit
ea5b5c3
·
verified ·
1 Parent(s): ee5b33f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -27
app.py CHANGED
@@ -8,11 +8,11 @@ from transformers import pipeline
8
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
9
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
10
 
11
- def extract_text(file_path):
12
- if file_path.endswith(".pdf"):
13
- return "\n".join([page.extract_text() or "" for page in PdfReader(file_path).pages])
14
- elif file_path.endswith(".docx"):
15
- return "\n".join([p.text for p in docx.Document(file_path).paragraphs])
16
  return ""
17
 
18
  def chunk_text(text, chunk_size=500):
@@ -28,11 +28,11 @@ def chunk_text(text, chunk_size=500):
28
  chunks.append(buffer.strip())
29
  return chunks
30
 
31
- def ask_question(file_path, question, history):
32
- if not file_path:
33
  return "Please upload a file.", history
34
 
35
- text = extract_text(file_path)
36
  chunks = chunk_text(text)
37
  emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
38
  emb_question = embedder.encode(question, convert_to_tensor=True)
@@ -46,26 +46,11 @@ def ask_question(file_path, question, history):
46
  return "", history
47
 
48
  with gr.Blocks() as demo:
49
- gr.Markdown("## 📄 Document QA with Smart Retrieval")
50
-
51
- # File upload styled like a dialogue box
52
- with gr.Row():
53
- with gr.Column():
54
- gr.Markdown("### 🔽 Upload Your File")
55
- file_input = gr.File(
56
- label="Choose a PDF or Word file",
57
- file_types=[".pdf", ".docx"],
58
- type="filepath" # ✅ Fixed for Hugging Face
59
- )
60
-
61
- chatbot = gr.Chatbot(label="💬 Chat with Document")
62
- question = gr.Textbox(
63
- label="Ask your question",
64
- placeholder="Type your question here..."
65
- )
66
  state = gr.State([])
67
-
68
- # Submit by pressing Enter
69
  question.submit(ask_question, [file_input, question, state], [question, chatbot])
70
 
71
  demo.launch()
 
8
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
9
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
10
 
11
+ def extract_text(file):
12
+ if file.name.endswith(".pdf"):
13
+ return "\n".join([page.extract_text() or "" for page in PdfReader(file).pages])
14
+ elif file.name.endswith(".docx"):
15
+ return "\n".join([p.text for p in docx.Document(file).paragraphs])
16
  return ""
17
 
18
  def chunk_text(text, chunk_size=500):
 
28
  chunks.append(buffer.strip())
29
  return chunks
30
 
31
+ def ask_question(file, question, history):
32
+ if not file:
33
  return "Please upload a file.", history
34
 
35
+ text = extract_text(file)
36
  chunks = chunk_text(text)
37
  emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
38
  emb_question = embedder.encode(question, convert_to_tensor=True)
 
46
  return "", history
47
 
48
  with gr.Blocks() as demo:
49
+ gr.Markdown("## Document QA with Smart Retrieval")
50
+ file_input = gr.File(label="Upload PDF or Word", file_types=[".pdf", ".docx"])
51
+ chatbot = gr.Chatbot()
52
+ question = gr.Textbox(label="Ask your question")
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  state = gr.State([])
 
 
54
  question.submit(ask_question, [file_input, question, state], [question, chatbot])
55
 
56
  demo.launch()