Spaces:
Running
Running
import gradio as gr | |
import pdfplumber | |
from transformers import pipeline | |
# Inicjalizacja modelu NER | |
extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple") | |
def extract_info(pdf_file): | |
with pdfplumber.open(pdf_file) as pdf: | |
text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) | |
# Przetwarzanie tekstu modelem NLP | |
entities = extractor(text) | |
# Formatowanie wyników | |
extracted_data = {} | |
for entity in entities: | |
label = entity["entity_group"] | |
word = entity["word"] | |
if label not in extracted_data: | |
extracted_data[label] = [] | |
extracted_data[label].append(word) | |
return extracted_data | |
# Interfejs użytkownika w Hugging Face Space | |
iface = gr.Interface( | |
fn=extract_info, | |
inputs=gr.File(label="Wybierz plik PDF"), | |
outputs="json", | |
title="Ekstrakcja informacji z faktur PDF", | |
description="Prześlij plik PDF z fakturą, a model rozpozna kluczowe informacje." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |