kryman27 commited on
Commit
db576bd
verified
1 Parent(s): 814c19e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -2,25 +2,25 @@ import gradio as gr
2
  import pdfplumber
3
  from transformers import pipeline
4
 
5
- # Inicjalizacja modelu do ekstrakcji informacji
6
- extractor = pipeline("ner", model="dslim/bert-base-NER")
7
 
8
  def extract_info(pdf_file):
9
  with pdfplumber.open(pdf_file) as pdf:
10
- text = ""
11
- for page in pdf.pages:
12
- text += page.extract_text() + "\n"
13
 
14
  # Przetwarzanie tekstu modelem NLP
15
  entities = extractor(text)
16
 
17
- # Filtrowanie i formatowanie wynik贸w
18
  extracted_data = {}
19
  for entity in entities:
20
- label = entity['entity']
21
- word = entity['word']
 
22
  if label not in extracted_data:
23
  extracted_data[label] = []
 
24
  extracted_data[label].append(word)
25
 
26
  return extracted_data
 
2
  import pdfplumber
3
  from transformers import pipeline
4
 
5
+ # Inicjalizacja modelu NER
6
+ extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
7
 
8
  def extract_info(pdf_file):
9
  with pdfplumber.open(pdf_file) as pdf:
10
+ text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
 
 
11
 
12
  # Przetwarzanie tekstu modelem NLP
13
  entities = extractor(text)
14
 
15
+ # Formatowanie wynik贸w
16
  extracted_data = {}
17
  for entity in entities:
18
+ label = entity["entity_group"]
19
+ word = entity["word"]
20
+
21
  if label not in extracted_data:
22
  extracted_data[label] = []
23
+
24
  extracted_data[label].append(word)
25
 
26
  return extracted_data