import gradio as gr | |
from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
def extract_text(pdf_file): | |
# Load a document | |
loader = PDFMinerLoader("cereal.pdf") | |
doc = loader.load() | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
texts = text_splitter.split_documents(doc) | |
texts = [i.page_content for i in texts] | |
return texts[0] | |
iface = gr.Interface( | |
fn=extract_text, | |
inputs=gr.File(type="file", label="Upload PDF"), | |
outputs="text" | |
) | |
iface.launch() | |