Spaces:
Running
Running
File size: 1,063 Bytes
07d0354 814c19e db576bd 07d0354 814c19e db576bd 07d0354 814c19e 07d0354 db576bd 814c19e db576bd 814c19e db576bd 814c19e 07d0354 7bd59d8 07d0354 814c19e 07d0354 814c19e 07d0354 814c19e 07d0354 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import gradio as gr
import pdfplumber
from transformers import pipeline
# Inicjalizacja modelu NER
extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
def extract_info(pdf_file):
with pdfplumber.open(pdf_file) as pdf:
text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
# Przetwarzanie tekstu modelem NLP
entities = extractor(text)
# Formatowanie wyników
extracted_data = {}
for entity in entities:
label = entity["entity_group"]
word = entity["word"]
if label not in extracted_data:
extracted_data[label] = []
extracted_data[label].append(word)
return extracted_data
# Interfejs użytkownika w Hugging Face Space
iface = gr.Interface(
fn=extract_info,
inputs=gr.File(label="Wybierz plik PDF"),
outputs="json",
title="Ekstrakcja informacji z faktur PDF",
description="Prześlij plik PDF z fakturą, a model rozpozna kluczowe informacje."
)
if __name__ == "__main__":
iface.launch()
|