File size: 2,495 Bytes
f95924f
 
 
0b05d6d
f95924f
0b05d6d
f95924f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from transformers import AutoModelForTokenClassification,AutoTokenizer,pipeline
import gradio as gr
import torch 
model = AutoModelForTokenClassification.from_pretrained('uer/roberta-base-finetuned-cluener2020-chinese')#,local_files_only=True)#cache_dir="C:\2023\Huggingface_4_12\Gradio Tutorial",force_download=True)
# model = AutoModelForTokenClassification.from_pretrained('C:\\2023\Huggingface_4_12\Gradio Tutorial\cache3\Huggingface_4_12\Gradio Tutorial\models--uer--roberta-base-finetuned-cluener2020-chinese\blobs\3d20fdef0b0f04d283e1693ef4c030b133fa7c3c')
tokenizer = AutoTokenizer.from_pretrained('uer/roberta-base-finetuned-cluener2020-chinese')#,local_files_only=True)


ner_pipeline = pipeline('ner', model=model, tokenizer=tokenizer)
examples=["江苏警方通报特斯拉冲进店铺","李沐,深度学习专家。\
李沐于2004年进入上海交通大学计算机科学与工程系进行本科学习;2009年至2010年担任香港科技大学研究助理;2011年至2012年担任百度高级研究员;2012年至2017年在美国卡内基梅隆大学攻读博士学位。2019年编著的《动手学深度学习》出版。\
李沐专注于分布式系统和机器学习算法的研究。"]

def ner(text):
    output1 = ner_pipeline(text)
   
    output = [output1[0]]
    if output[0]['entity'][1] == '-':
        output[0]['entity'] = output[0]['entity'][2:len(output[0]['entity'])]
    # j = 0 
    for i in range(1,len(output1)):
        if output1[i]['entity'][1] == '-':
            output1[i]['entity'] = output1[i]['entity'][2:len(output1[i]['entity'])]
        dict1 = output1[i]
        u = len(output) - 1 
        dict0 = output[u]
        if (dict0['end'] == dict1['start']) and (dict0['entity'] == dict1['entity']):
            dict = {
                'entity':dict0['entity'],
                'score':min(dict0['score'],dict1['score']),
                'index':dict1['index'],
                'word':dict0['word']+dict1['word'],
                'start':dict0['start'],
                'end':dict1['end'],
            }
  
            output[len(output) - 1] = dict        
        else:     
            dict = dict1
            output.append(dict)   
        # print('output_before',output)
        # print('output_after',output)
    return {"text": text, "entities": output}    

demo = gr.Interface(ner,
             gr.Textbox(placeholder="Enter sentence here..."), 
             gr.HighlightedText(),
             examples=examples)

demo.launch()