import gradio as gr from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import FAISS from langchain_community.llms import HuggingFaceLLM from langchain.chains import MapReduceChain from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.prompts import PromptTemplate import os # Load model and embeddings embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", device="cpu") llm = HuggingFaceLLM(model_name="meta-llama/Meta-Llama-3.1-8B-Instruct") def process_pdf_and_summarize(file): try: # Load PDF document loader = PyPDFLoader(file.name) documents = loader.load() # Summarize the document map_template = """Summarize the following text:\n\n{text}\n\nSummary:""" map_prompt = PromptTemplate.from_template(map_template) reduce_template = """Combine these summaries into a final summary:\n\nSummaries: {doc_summaries}\n\nFinal Summary:""" reduce_prompt = PromptTemplate.from_template(reduce_template) chain = MapReduceChain.from_chain_type( llm=llm, chain_type="map_reduce", map_prompt=map_prompt, reduce_prompt=reduce_prompt, text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) ) summary = chain.run(documents) return summary except Exception as e: return f"Error processing PDF: {str(e)}" # Gradio interface interface = gr.Interface( fn=process_pdf_and_summarize, inputs=gr.inputs.File(label="Upload PDF"), outputs="text", title="PDF Summarizer", description="Upload a PDF document to generate a summary." ) interface.launch()