Spaces:

Roberta2024
/

optimizaitonMultimodaldata

Sleeping

App Files Files Community

Roberta2024 commited on Aug 28, 2024

Commit

022296a

verified ·

1 Parent(s): fd07a91

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -8

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import asyncio
 import gradio as gr
 from langchain_core.prompts import PromptTemplate
-from langchain_community.document_loaders import PyPDFLoader
 from langchain_google_genai import ChatGoogleGenerativeAI
 import google.generativeai as genai
 from langchain.chains.question_answering import load_qa_chain
@@ -31,21 +31,25 @@ def load_mistral_model():
         mistral_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
 @lru_cache(maxsize=100)
-def get_pdf_context(file_path):
     doc = pymupdf.open(file_path)
-    text = ""
-    for page in doc:
-        text += page.get_text()
-    return text[:10000]  # Limit context to first 10000 characters
 async def process_pdf(file_path, question):
     model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
     prompt_template = """Answer the question as precise as possible using the provided context. If the answer is not contained in the context, say "answer not available in context" \n\n Context: \n {context}?\n Question: \n {question} \n Answer: """
     prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
-    context = get_pdf_context(file_path)
     stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
-    stuff_answer = await stuff_chain.arun({"input_documents": [context], "question": question, "context": context})
     return stuff_answer
 async def process_image(image, question):

 import asyncio
 import gradio as gr
 from langchain_core.prompts import PromptTemplate
+from langchain_core.documents import Document
 from langchain_google_genai import ChatGoogleGenerativeAI
 import google.generativeai as genai
 from langchain.chains.question_answering import load_qa_chain
         mistral_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
 @lru_cache(maxsize=100)
+def get_pdf_content(file_path):
     doc = pymupdf.open(file_path)
+    content = []
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        text = page.get_text()
+        content.append(Document(page_content=text, metadata={"page": page_num + 1}))
+    return content
 async def process_pdf(file_path, question):
     model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
     prompt_template = """Answer the question as precise as possible using the provided context. If the answer is not contained in the context, say "answer not available in context" \n\n Context: \n {context}?\n Question: \n {question} \n Answer: """
     prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
+    pdf_content = get_pdf_content(file_path)
+    context = "\n".join([doc.page_content for doc in pdf_content[:5]])  # Limit to first 5 pages for efficiency
     stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
+    stuff_answer = await stuff_chain.arun({"input_documents": pdf_content[:5], "question": question, "context": context})
     return stuff_answer
 async def process_image(image, question):