elshehawy commited on
Commit
72fee02
Β·
1 Parent(s): d735f80

update app.py file to work with orgs from gpt

Browse files
Files changed (1) hide show
  1. app.py +22 -34
app.py CHANGED
@@ -7,18 +7,18 @@ from transformers import pipeline
7
  # from dotenv import load_dotenv, find_dotenv
8
  import huggingface_hub
9
  import json
10
- # from simcse import SimCSE # use for gpt
11
  from evaluate_data import store_sample_data, get_metrics_trf
12
 
13
  # store_sample_data()
14
 
15
 
16
 
17
- with open('./data/sample_data.json', 'r') as f:
18
- # sample_data = [
19
- # {'id': "", 'text': "", 'orgs': ["", ""]}
20
- # ]
21
- sample_data = json.load(f)
22
 
23
  # _ = load_dotenv(find_dotenv()) # read local .env file
24
  hf_token= os.environ['HF_TOKEN']
@@ -73,16 +73,16 @@ def find_orgs_gpt(sentence):
73
  # return list(set(org_list))
74
 
75
 
76
- true_orgs = [sent['orgs'] for sent in sample_data]
77
 
78
- predicted_orgs_gpt = [find_orgs_gpt(sent['text']) for sent in sample_data]
79
  # predicted_orgs_trf = [find_orgs_trf(sent['text']) for sent in sample_data]
80
 
81
- all_metrics = {}
82
 
83
  # sim_model = SimCSE('sentence-transformers/all-MiniLM-L6-v2')
84
  # all_metrics['gpt'] = calc_metrics(true_orgs, predicted_orgs_gpt, sim_model)
85
- print('Finiding all metrics trf')
86
  # all_metrics['trf'] = get_metrics_trf()
87
 
88
 
@@ -92,38 +92,26 @@ My latest exclusive for The Hill : Conservative frustration over Republican effo
92
 
93
  """
94
  def find_orgs(uploaded_file):
95
- print('=*'*80)
96
- print(type(uploaded_file))
97
- # print(uploaded_file)
98
- try:
99
- print('inside try')
100
- # print(uploaded_file.decode())
101
- uploaded_data = json.loads(uploaded_file.decode())
102
 
103
- except:
104
- print('inside except')
105
- # print(uploaded_file.decode())
106
- uploaded_data = json.loads(uploaded_file)
107
-
108
-
109
- # all_metrics = {}
110
- # all_metrics['trf'] = get_metrics_trf(uploaded_data)
111
-
112
- # store_sample_data(uploaded_data)
113
  # with open('./data/sample_data.json', 'r') as f:
114
  # sample_data = json.load(f)
115
 
116
- # gpt_orgs, true_orgs = [], []
117
 
118
- # for sent in sample_data:
119
- # gpt_orgs.append(find_orgs_gpt(sent['text']))
120
- # true_orgs.append(sent['orgs'])
121
 
122
 
123
- # sim_model = SimCSE('sentence-transformers/all-MiniLM-L6-v2')
124
- # all_metrics['gpt'] = calc_metrics(true_orgs, gpt_orgs, sim_model)
125
 
126
- return get_metrics_trf(uploaded_data)
127
  # radio_btn = gr.Radio(choices=['GPT', 'iSemantics'], value='iSemantics', label='Available models', show_label=True)
128
  # textbox = gr.Textbox(label="Enter your text", placeholder=str(all_metrics), lines=8)
129
  upload_btn = gr.UploadButton(label='Upload a json file.', type='binary')
 
7
  # from dotenv import load_dotenv, find_dotenv
8
  import huggingface_hub
9
  import json
10
+ from simcse import SimCSE # use for gpt
11
  from evaluate_data import store_sample_data, get_metrics_trf
12
 
13
  # store_sample_data()
14
 
15
 
16
 
17
+ # with open('./data/sample_data.json', 'r') as f:
18
+ # # sample_data = [
19
+ # # {'id': "", 'text': "", 'orgs': ["", ""]}
20
+ # # ]
21
+ # sample_data = json.load(f)
22
 
23
  # _ = load_dotenv(find_dotenv()) # read local .env file
24
  hf_token= os.environ['HF_TOKEN']
 
73
  # return list(set(org_list))
74
 
75
 
76
+ # true_orgs = [sent['orgs'] for sent in sample_data]
77
 
78
+ # predicted_orgs_gpt = [find_orgs_gpt(sent['text']) for sent in sample_data]
79
  # predicted_orgs_trf = [find_orgs_trf(sent['text']) for sent in sample_data]
80
 
81
+ # all_metrics = {}
82
 
83
  # sim_model = SimCSE('sentence-transformers/all-MiniLM-L6-v2')
84
  # all_metrics['gpt'] = calc_metrics(true_orgs, predicted_orgs_gpt, sim_model)
85
+ # print('Finiding all metrics trf')
86
  # all_metrics['trf'] = get_metrics_trf()
87
 
88
 
 
92
 
93
  """
94
  def find_orgs(uploaded_file):
95
+ uploaded_data = json.loads(uploaded_file)
96
+ all_metrics = {}
97
+ all_metrics['trf'] = get_metrics_trf(uploaded_data)
 
 
 
 
98
 
99
+
100
+ sample_data = store_sample_data(uploaded_data)
 
 
 
 
 
 
 
 
101
  # with open('./data/sample_data.json', 'r') as f:
102
  # sample_data = json.load(f)
103
 
104
+ gpt_orgs, true_orgs = [], []
105
 
106
+ for sent in sample_data:
107
+ gpt_orgs.append(find_orgs_gpt(sent['text']))
108
+ true_orgs.append(sent['orgs'])
109
 
110
 
111
+ sim_model = SimCSE('sentence-transformers/all-MiniLM-L6-v2')
112
+ all_metrics['gpt'] = calc_metrics(true_orgs, gpt_orgs, sim_model)
113
 
114
+ return all_metrics
115
  # radio_btn = gr.Radio(choices=['GPT', 'iSemantics'], value='iSemantics', label='Available models', show_label=True)
116
  # textbox = gr.Textbox(label="Enter your text", placeholder=str(all_metrics), lines=8)
117
  upload_btn = gr.UploadButton(label='Upload a json file.', type='binary')