chatPDF / app.py
dammy's picture
Rename requirements.txt to app.py
b116574
raw
history blame
589 Bytes
import gradio as gr
from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
def extract_text(pdf_file):
# Load a document
loader = PDFMinerLoader("cereal.pdf")
doc = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(doc)
texts = [i.page_content for i in texts]
return texts[0]
iface = gr.Interface(
fn=extract_text,
inputs=gr.File(type="file", label="Upload PDF"),
outputs="text"
)
iface.launch()