Spaces:
Sleeping
Sleeping
import gradio as gr | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.llms import HuggingFaceLLM | |
from langchain.chains import MapReduceChain | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.prompts import PromptTemplate | |
import os | |
# Load model and embeddings | |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", device="cpu") | |
llm = HuggingFaceLLM(model_name="meta-llama/Meta-Llama-3.1-8B-Instruct") | |
def process_pdf_and_summarize(file): | |
try: | |
# Load PDF document | |
loader = PyPDFLoader(file.name) | |
documents = loader.load() | |
# Summarize the document | |
map_template = """Summarize the following text:\n\n{text}\n\nSummary:""" | |
map_prompt = PromptTemplate.from_template(map_template) | |
reduce_template = """Combine these summaries into a final summary:\n\nSummaries: {doc_summaries}\n\nFinal Summary:""" | |
reduce_prompt = PromptTemplate.from_template(reduce_template) | |
chain = MapReduceChain.from_chain_type( | |
llm=llm, | |
chain_type="map_reduce", | |
map_prompt=map_prompt, | |
reduce_prompt=reduce_prompt, | |
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
) | |
summary = chain.run(documents) | |
return summary | |
except Exception as e: | |
return f"Error processing PDF: {str(e)}" | |
# Gradio interface | |
interface = gr.Interface( | |
fn=process_pdf_and_summarize, | |
inputs=gr.inputs.File(label="Upload PDF"), | |
outputs="text", | |
title="PDF Summarizer", | |
description="Upload a PDF document to generate a summary." | |
) | |
interface.launch() | |