Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import asyncio
|
3 |
import gradio as gr
|
4 |
from langchain_core.prompts import PromptTemplate
|
5 |
-
from
|
6 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
7 |
import google.generativeai as genai
|
8 |
from langchain.chains.question_answering import load_qa_chain
|
@@ -31,21 +31,25 @@ def load_mistral_model():
|
|
31 |
mistral_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
|
32 |
|
33 |
@lru_cache(maxsize=100)
|
34 |
-
def
|
35 |
doc = pymupdf.open(file_path)
|
36 |
-
|
37 |
-
for
|
38 |
-
|
39 |
-
|
|
|
|
|
40 |
|
41 |
async def process_pdf(file_path, question):
|
42 |
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
|
43 |
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is not contained in the context, say "answer not available in context" \n\n Context: \n {context}?\n Question: \n {question} \n Answer: """
|
44 |
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
45 |
|
46 |
-
|
|
|
|
|
47 |
stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
48 |
-
stuff_answer = await stuff_chain.arun({"input_documents": [
|
49 |
return stuff_answer
|
50 |
|
51 |
async def process_image(image, question):
|
|
|
2 |
import asyncio
|
3 |
import gradio as gr
|
4 |
from langchain_core.prompts import PromptTemplate
|
5 |
+
from langchain_core.documents import Document
|
6 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
7 |
import google.generativeai as genai
|
8 |
from langchain.chains.question_answering import load_qa_chain
|
|
|
31 |
mistral_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
|
32 |
|
33 |
@lru_cache(maxsize=100)
|
34 |
+
def get_pdf_content(file_path):
|
35 |
doc = pymupdf.open(file_path)
|
36 |
+
content = []
|
37 |
+
for page_num in range(len(doc)):
|
38 |
+
page = doc[page_num]
|
39 |
+
text = page.get_text()
|
40 |
+
content.append(Document(page_content=text, metadata={"page": page_num + 1}))
|
41 |
+
return content
|
42 |
|
43 |
async def process_pdf(file_path, question):
|
44 |
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
|
45 |
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is not contained in the context, say "answer not available in context" \n\n Context: \n {context}?\n Question: \n {question} \n Answer: """
|
46 |
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
47 |
|
48 |
+
pdf_content = get_pdf_content(file_path)
|
49 |
+
context = "\n".join([doc.page_content for doc in pdf_content[:5]]) # Limit to first 5 pages for efficiency
|
50 |
+
|
51 |
stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
52 |
+
stuff_answer = await stuff_chain.arun({"input_documents": pdf_content[:5], "question": question, "context": context})
|
53 |
return stuff_answer
|
54 |
|
55 |
async def process_image(image, question):
|