Spaces:
Build error
Build error
File size: 1,375 Bytes
f3a61e0 a3fdd99 c3572c4 a3fdd99 f3a61e0 819ac67 84af4a0 a6970fe f3a61e0 3986348 0389e9a 3986348 f3a61e0 fc419f1 f3a61e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
from haystack.document_stores import InMemoryDocumentStore
from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever
import logging
document_store = InMemoryDocumentStore()
preprocessor = PreProcessor(
clean_empty_lines=True,
clean_whitespace=True,
clean_header_footer=True,
split_by="word",
split_length=100,
split_respect_sentence_boundary=True,
split_overlap=3
)
def pdf_to_document_store(pdf_files):
document_store.delete_documents()
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
documents = [converter.convert(file_path=pdf_files.name, meta=None)[0]]
preprocessed_docs = preprocessor.process(documents)
document_store.write_documents(preprocessed_docs)
return None
def summarize(files):
print('Got files')
print(type(files))
pdf_to_document_store(files)
return document_store.get_document_count()
title = "Summarize one or more PDFs with a Haystack Summariser pipeline"
#print('Before files')
#files = gr.inputs.File(file_count="multiple",type="file", label="Upload a pdf")
#print(str(files))
#print('After files')
iface = gr.Interface(fn=summarize,
inputs= "files",
outputs="text",
title=title,
theme="default")
iface.launch()
|