File size: 2,251 Bytes
f5ecfb4
 
 
 
5a85444
 
 
f5ecfb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42a117e
f5ecfb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50d7ee8
f5ecfb4
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
import gradio as gr

model_name = "valurank/bert-base-NER"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

def rename_group(output_list):
  final_output = []

  for output in output_list:
    output["entity"] = output["entity_group"]
    del output["entity_group"]

    final_output.append(output)
    
  return final_output

def remove_prefix(word, prefix):
    if prefix in word:
        return word.split(prefix, 1)[1]
        
    return " " + word

def join_results(results):
    joined_results = []
    
    for result in results:
        if "##" in result["word"] and joined_results:
            joined_results[-1]["end"] = result["end"]
            joined_results[-1]["word"] += remove_prefix(result["word"], "##")
            joined_results[-1]["score"] = min(joined_results[-1]["score"], result["score"])
        else:
            joined_results.append(result)
            
    return joined_results

examples = [
    """ Texas A&M professor used chatbot chatbot to assess students' grades.
    The OpenAI chatbot is actually called ChatGPT and claims to have written every paper written by the bot.
    The bot isn’t made to detect material composed by AI, or even material produced by itself.
    Texas A&M University-Commerce said they are investigating the incident and developing policies related to AI in the classroom.
    The university denied that anyone had received a failing grade.
    The school also confirmed that several students had been cleared of any academic dishonesty.
    The use of AI in coursework is a rapidly changing issue that confronts all learning institutions."""
]


def ner(text):
    output = nlp(text)
    output = join_results(output)
    output = rename_group(output)
    
    return {"text": text, "entities": output}    

demo = gr.Interface(ner,
             gr.Textbox(placeholder="Enter sentence here..."), 
             gr.HighlightedText(),
             examples=examples)

if __name__ == '__main__':
  demo.launch(debug=True)