Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,11 +8,11 @@ from transformers import pipeline
|
|
8 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
9 |
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
|
10 |
|
11 |
-
def extract_text(
|
12 |
-
if
|
13 |
-
return "\n".join([page.extract_text() or "" for page in PdfReader(
|
14 |
-
elif
|
15 |
-
return "\n".join([p.text for p in docx.Document(
|
16 |
return ""
|
17 |
|
18 |
def chunk_text(text, chunk_size=500):
|
@@ -28,11 +28,11 @@ def chunk_text(text, chunk_size=500):
|
|
28 |
chunks.append(buffer.strip())
|
29 |
return chunks
|
30 |
|
31 |
-
def ask_question(
|
32 |
-
if not
|
33 |
return "Please upload a file.", history
|
34 |
|
35 |
-
text = extract_text(
|
36 |
chunks = chunk_text(text)
|
37 |
emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
|
38 |
emb_question = embedder.encode(question, convert_to_tensor=True)
|
@@ -46,26 +46,11 @@ def ask_question(file_path, question, history):
|
|
46 |
return "", history
|
47 |
|
48 |
with gr.Blocks() as demo:
|
49 |
-
gr.Markdown("##
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
with gr.Column():
|
54 |
-
gr.Markdown("### 🔽 Upload Your File")
|
55 |
-
file_input = gr.File(
|
56 |
-
label="Choose a PDF or Word file",
|
57 |
-
file_types=[".pdf", ".docx"],
|
58 |
-
type="filepath" # ✅ Fixed for Hugging Face
|
59 |
-
)
|
60 |
-
|
61 |
-
chatbot = gr.Chatbot(label="💬 Chat with Document")
|
62 |
-
question = gr.Textbox(
|
63 |
-
label="Ask your question",
|
64 |
-
placeholder="Type your question here..."
|
65 |
-
)
|
66 |
state = gr.State([])
|
67 |
-
|
68 |
-
# Submit by pressing Enter
|
69 |
question.submit(ask_question, [file_input, question, state], [question, chatbot])
|
70 |
|
71 |
demo.launch()
|
|
|
8 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
9 |
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
|
10 |
|
11 |
+
def extract_text(file):
|
12 |
+
if file.name.endswith(".pdf"):
|
13 |
+
return "\n".join([page.extract_text() or "" for page in PdfReader(file).pages])
|
14 |
+
elif file.name.endswith(".docx"):
|
15 |
+
return "\n".join([p.text for p in docx.Document(file).paragraphs])
|
16 |
return ""
|
17 |
|
18 |
def chunk_text(text, chunk_size=500):
|
|
|
28 |
chunks.append(buffer.strip())
|
29 |
return chunks
|
30 |
|
31 |
+
def ask_question(file, question, history):
|
32 |
+
if not file:
|
33 |
return "Please upload a file.", history
|
34 |
|
35 |
+
text = extract_text(file)
|
36 |
chunks = chunk_text(text)
|
37 |
emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
|
38 |
emb_question = embedder.encode(question, convert_to_tensor=True)
|
|
|
46 |
return "", history
|
47 |
|
48 |
with gr.Blocks() as demo:
|
49 |
+
gr.Markdown("## Document QA with Smart Retrieval")
|
50 |
+
file_input = gr.File(label="Upload PDF or Word", file_types=[".pdf", ".docx"])
|
51 |
+
chatbot = gr.Chatbot()
|
52 |
+
question = gr.Textbox(label="Ask your question")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
state = gr.State([])
|
|
|
|
|
54 |
question.submit(ask_question, [file_input, question, state], [question, chatbot])
|
55 |
|
56 |
demo.launch()
|