Roberta2024 commited on
Commit
022296a
·
verified ·
1 Parent(s): fd07a91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import asyncio
3
  import gradio as gr
4
  from langchain_core.prompts import PromptTemplate
5
- from langchain_community.document_loaders import PyPDFLoader
6
  from langchain_google_genai import ChatGoogleGenerativeAI
7
  import google.generativeai as genai
8
  from langchain.chains.question_answering import load_qa_chain
@@ -31,21 +31,25 @@ def load_mistral_model():
31
  mistral_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
32
 
33
  @lru_cache(maxsize=100)
34
- def get_pdf_context(file_path):
35
  doc = pymupdf.open(file_path)
36
- text = ""
37
- for page in doc:
38
- text += page.get_text()
39
- return text[:10000] # Limit context to first 10000 characters
 
 
40
 
41
  async def process_pdf(file_path, question):
42
  model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
43
  prompt_template = """Answer the question as precise as possible using the provided context. If the answer is not contained in the context, say "answer not available in context" \n\n Context: \n {context}?\n Question: \n {question} \n Answer: """
44
  prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
45
 
46
- context = get_pdf_context(file_path)
 
 
47
  stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
48
- stuff_answer = await stuff_chain.arun({"input_documents": [context], "question": question, "context": context})
49
  return stuff_answer
50
 
51
  async def process_image(image, question):
 
2
  import asyncio
3
  import gradio as gr
4
  from langchain_core.prompts import PromptTemplate
5
+ from langchain_core.documents import Document
6
  from langchain_google_genai import ChatGoogleGenerativeAI
7
  import google.generativeai as genai
8
  from langchain.chains.question_answering import load_qa_chain
 
31
  mistral_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
32
 
33
  @lru_cache(maxsize=100)
34
+ def get_pdf_content(file_path):
35
  doc = pymupdf.open(file_path)
36
+ content = []
37
+ for page_num in range(len(doc)):
38
+ page = doc[page_num]
39
+ text = page.get_text()
40
+ content.append(Document(page_content=text, metadata={"page": page_num + 1}))
41
+ return content
42
 
43
  async def process_pdf(file_path, question):
44
  model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
45
  prompt_template = """Answer the question as precise as possible using the provided context. If the answer is not contained in the context, say "answer not available in context" \n\n Context: \n {context}?\n Question: \n {question} \n Answer: """
46
  prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
47
 
48
+ pdf_content = get_pdf_content(file_path)
49
+ context = "\n".join([doc.page_content for doc in pdf_content[:5]]) # Limit to first 5 pages for efficiency
50
+
51
  stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
52
+ stuff_answer = await stuff_chain.arun({"input_documents": pdf_content[:5], "question": question, "context": context})
53
  return stuff_answer
54
 
55
  async def process_image(image, question):