Spaces:
Runtime error
Runtime error
File size: 3,921 Bytes
894b24d d4df546 894b24d 10eaeda f43384a 894b24d d3f1526 894b24d d3f1526 d4df546 4b3d121 9b360b3 c9f9a75 b094a6f c9f9a75 b094a6f c9f9a75 953205d d4df546 cc4118d d4df546 52ded96 894b24d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
from metrics import calc_metrics
import gradio as gr
from openai import OpenAI
import os
from transformers import pipeline
# from dotenv import load_dotenv, find_dotenv
import huggingface_hub
import json
# from simcse import SimCSE # use for gpt
from evaluate_data import store_sample_data, get_metrics_trf
# store_sample_data()
with open('./data/sample_data.json', 'r') as f:
# sample_data = [
# {'id': "", 'text': "", 'orgs': ["", ""]}
# ]
sample_data = json.load(f)
# _ = load_dotenv(find_dotenv()) # read local .env file
hf_token= os.environ['HF_TOKEN']
huggingface_hub.login(hf_token)
pipe = pipeline("token-classification", model="elshehawy/finer-ord-transformers", aggregation_strategy="first")
llm_model = 'gpt-3.5-turbo-0125'
# openai.api_key = os.environ['OPENAI_API_KEY']
client = OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
)
def get_completion(prompt, model=llm_model):
messages = [{"role": "user", "content": prompt}]
response = client.chat.completions.create(
messages=messages,
model=model,
temperature=0,
)
return response.choices[0].message.content
def find_orgs_gpt(sentence):
prompt = f"""
In context of named entity recognition (NER), find all organizations in the text delimited by triple backticks.
text:
```
{sentence}
```
You should output only a list of organizations and follow this output format exactly: ["org_1", "org_2", "org_3"]
"""
sent_orgs_str = get_completion(prompt)
sent_orgs = json.loads(sent_orgs_str)
return sent_orgs
# def find_orgs_trf(sentence):
# org_list = []
# for ent in pipe(sentence):
# if ent['entity_group'] == 'ORG':
# # message += f'\n- {ent["word"]} \t- score: {ent["score"]}'
# # message += f'\n- {ent["word"]}'# \t- score: {ent["score"]}'
# org_list.append(ent['word'])
# return list(set(org_list))
true_orgs = [sent['orgs'] for sent in sample_data]
predicted_orgs_gpt = [find_orgs_gpt(sent['text']) for sent in sample_data]
# predicted_orgs_trf = [find_orgs_trf(sent['text']) for sent in sample_data]
all_metrics = {}
# sim_model = SimCSE('sentence-transformers/all-MiniLM-L6-v2')
# all_metrics['gpt'] = calc_metrics(true_orgs, predicted_orgs_gpt, sim_model)
print('Finiding all metrics trf')
# all_metrics['trf'] = get_metrics_trf()
example = """
My latest exclusive for The Hill : Conservative frustration over Republican efforts to force a House vote on reauthorizing the Export - Import Bank boiled over Wednesday during a contentious GOP meeting.
"""
def find_orgs(uploaded_file):
print('=*'*80)
print(type(uploaded_file))
print(uploaded_file)
try:
print('inside try')
print(uploaded_file.decode())
uploaded_data = json.load(uploaded_file)
except:
print('inside except')
print(uploaded_file.decode())
uploaded_data = json.loads(uploaded_file)
all_metrics = {}
all_metrics['trf'] = get_metrics_trf(uploaded_data)
store_sample_data(uploaded_data)
with open('./data/sample_data.json', 'r') as f:
sample_data = json.load(f)
gpt_orgs, true_orgs = [], []
for sent in sample_data:
gpt_orgs.append(find_orgs_gpt(sent['text']))
true_orgs.append(sent['orgs'])
# sim_model = SimCSE('sentence-transformers/all-MiniLM-L6-v2')
# all_metrics['gpt'] = calc_metrics(true_orgs, gpt_orgs, sim_model)
return
# radio_btn = gr.Radio(choices=['GPT', 'iSemantics'], value='iSemantics', label='Available models', show_label=True)
# textbox = gr.Textbox(label="Enter your text", placeholder=str(all_metrics), lines=8)
upload_btn = gr.UploadButton(label='Upload a json file.', type='binary')
iface = gr.Interface(fn=find_orgs, inputs=upload_btn, outputs="text")
iface.launch(share=True)
|