Spaces:

iSemantics
/

ner-demo-evaluate

Runtime error

elshehawy commited on Mar 2, 2024

Commit

953205d

1 Parent(s): b094a6f

update app.py, and evaluate_data.py

Files changed (2) hide show

app.py CHANGED Viewed

@@ -45,7 +45,6 @@ def get_completion(prompt, model=llm_model):
     return response.choices[0].message.content
 def find_orgs_gpt(sentence):
     prompt = f"""
     In context of named entity recognition (NER), find all organizations in the text delimited by triple backticks.
@@ -106,7 +105,25 @@ def find_orgs(uploaded_file):
         print(uploaded_file.decode())
         uploaded_data = json.loads(uploaded_file)
-    return get_metrics_trf(uploaded_file)
 # radio_btn = gr.Radio(choices=['GPT', 'iSemantics'], value='iSemantics', label='Available models', show_label=True)
 # textbox = gr.Textbox(label="Enter your text", placeholder=str(all_metrics), lines=8)
 upload_btn = gr.UploadButton(label='Upload a json file.', type='binary')

     return response.choices[0].message.content
 def find_orgs_gpt(sentence):
     prompt = f"""
     In context of named entity recognition (NER), find all organizations in the text delimited by triple backticks.
         print(uploaded_file.decode())
         uploaded_data = json.loads(uploaded_file)
+    all_metrics = {}
+    all_metrics['trf'] = get_metrics_trf(uploaded_data)
+    store_sample_data(uploaded_data)
+    with open('./data/sample_data.json', 'r') as f:
+        sample_data = json.load(f)
+    gpt_orgs, true_orgs = [], []
+    for sent in sample_data:
+        gpt_orgs.append(find_orgs_gpt(sent['text']))
+        true_orgs.append(sent['orgs'])
+    # sim_model = SimCSE('sentence-transformers/all-MiniLM-L6-v2')
+    # all_metrics['gpt'] = calc_metrics(true_orgs, gpt_orgs, sim_model)
+    return
 # radio_btn = gr.Radio(choices=['GPT', 'iSemantics'], value='iSemantics', label='Available models', show_label=True)
 # textbox = gr.Textbox(label="Enter your text", placeholder=str(all_metrics), lines=8)
 upload_btn = gr.UploadButton(label='Upload a json file.', type='binary')

evaluate_data.py CHANGED Viewed

@@ -34,11 +34,8 @@ with open(feature_path, 'rb') as f:
 ner_model = AutoModelForTokenClassification.from_pretrained(checkpoint)
 # tokenized_dataset.set_format('torch')
 def collate_fn(data):
     input_ids = [(element['input_ids']) for element in data]
     attention_mask = [element['attention_mask'] for element in data]
@@ -56,7 +53,7 @@ def get_metrics_trf(data):
     print(device)
     data = Dataset.from_dict(data)
     tokenized_data = data.map(
         tokenize_and_align_labels,
         batched=True,
@@ -90,9 +87,7 @@ def get_metrics_trf(data):
     #     json.dump(all_metrics, f)
-def find_orgs(tokens, labels):
     orgs = []
     prev_tok_id = 0
     for i, (token, label) in enumerate(zip(tokens, labels)):
@@ -129,5 +124,5 @@ def store_sample_data(data):
             'orgs': sent_orgs
         })
-    with open('data/sample_data.json', 'w') as f:
         json.dump(test_data, f)

 ner_model = AutoModelForTokenClassification.from_pretrained(checkpoint)
 # tokenized_dataset.set_format('torch')
 def collate_fn(data):
     input_ids = [(element['input_ids']) for element in data]
     attention_mask = [element['attention_mask'] for element in data]
     print(device)
     data = Dataset.from_dict(data)
     tokenized_data = data.map(
         tokenize_and_align_labels,
         batched=True,
     #     json.dump(all_metrics, f)
+def find_orgs_in_data(tokens, labels):
     orgs = []
     prev_tok_id = 0
     for i, (token, label) in enumerate(zip(tokens, labels)):
             'orgs': sent_orgs
         })
+    with open('./data/sample_data.json', 'w') as f:
         json.dump(test_data, f)