File size: 1,822 Bytes
ef0dde7
ffaaa3a
 
 
ef0dde7
 
 
1e615dd
ef0dde7
1e615dd
ef0dde7
 
252803a
1e615dd
ef0dde7
1e615dd
ef0dde7
 
 
1e615dd
ef0dde7
a5b6905
 
ef0dde7
a5b6905
1e615dd
ef0dde7
 
 
 
 
 
1e615dd
ef0dde7
1e615dd
a5b6905
ef0dde7
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceLLM
from langchain.chains import MapReduceChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
import os

# Load model and embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", device="cpu")
llm = HuggingFaceLLM(model_name="meta-llama/Meta-Llama-3.1-8B-Instruct")

def process_pdf_and_summarize(file):
    try:
        # Load PDF document
        loader = PyPDFLoader(file.name)
        documents = loader.load()

        # Summarize the document
        map_template = """Summarize the following text:\n\n{text}\n\nSummary:"""
        map_prompt = PromptTemplate.from_template(map_template)
        reduce_template = """Combine these summaries into a final summary:\n\nSummaries: {doc_summaries}\n\nFinal Summary:"""
        reduce_prompt = PromptTemplate.from_template(reduce_template)

        chain = MapReduceChain.from_chain_type(
            llm=llm,
            chain_type="map_reduce",
            map_prompt=map_prompt,
            reduce_prompt=reduce_prompt,
            text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        )
        summary = chain.run(documents)
        return summary
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

# Gradio interface
interface = gr.Interface(
    fn=process_pdf_and_summarize,
    inputs=gr.inputs.File(label="Upload PDF"),
    outputs="text",
    title="PDF Summarizer",
    description="Upload a PDF document to generate a summary."
)

interface.launch()