Spaces:
Build error
Build error
import gradio as gr | |
from haystack.document_stores import InMemoryDocumentStore | |
from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever | |
import logging | |
document_store = InMemoryDocumentStore() | |
preprocessor = PreProcessor( | |
clean_empty_lines=True, | |
clean_whitespace=True, | |
clean_header_footer=True, | |
split_by="word", | |
split_length=100, | |
split_respect_sentence_boundary=True, | |
split_overlap=3 | |
) | |
def pdf_to_document_store(pdf_files): | |
document_store.delete_documents() | |
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"]) | |
documents = [converter.convert(file_path=pdf_files.name, meta=None)[0]] | |
preprocessed_docs = preprocessor.process(documents) | |
document_store.write_documents(preprocessed_docs) | |
return None | |
def summarize(files): | |
print('Got files') | |
print(type(files)) | |
pdf_to_document_store(files) | |
return document_store.get_document_count() | |
title = "Summarize one or more PDFs with a Haystack Summariser pipeline" | |
#print('Before files') | |
#files = gr.inputs.File(file_count="multiple",type="file", label="Upload a pdf") | |
#print(str(files)) | |
#print('After files') | |
iface = gr.Interface(fn=summarize, | |
inputs= "files", | |
outputs="text", | |
title=title, | |
theme="default") | |
iface.launch() | |