tomsoderlund's picture
tokenizer from KBLab example
9cb5903
raw
history blame
813 Bytes
import gradio as gr
from transformers import pipeline
import json
def process_swedish_text(text):
# Models from https://huggingface.co/models
# https://huggingface.co/KBLab/bert-base-swedish-cased-ner
nlp = pipeline('ner', model='KBLab/bert-base-swedish-cased-ner', tokenizer='KBLab/bert-base-swedish-cased-ner')
# Run NER
pipeline_results = nlp(text)
print('NER results:', pipeline_results)
# Fix TypeError("'numpy.float32' object is not iterable")
pipeline_results_adjusted = map(lambda entity: entity | { 'score': float(entity['score']) }, pipeline_results)
print(pipeline_results_adjusted)
# Return values
return json.dumps({'entities': list(pipeline_results_adjusted)})
gradio_interface = gr.Interface(fn=process_swedish_text, inputs="text", outputs="json")
gradio_interface.launch()