Wedyan2023 commited on
Commit
e85a22f
·
verified ·
1 Parent(s): 4b87947

Update app103.py

Browse files
Files changed (1) hide show
  1. app103.py +95 -18
app103.py CHANGED
@@ -1240,23 +1240,62 @@ if "task_choice" in st.session_state:
1240
  'task_type': 'Named Entity Recognition (NER)',
1241
  'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1242
  })
 
 
1243
 
1244
- #new 24/4/2025
1245
- # Save and provide download options
1246
- if labeled_examples:
1247
- # Update session state
1248
- st.session_state.labeled_examples = labeled_examples
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1249
 
1250
- # Convert to CSV and JSON
1251
- df = pd.DataFrame(labeled_examples)
 
 
 
 
 
 
 
1252
 
1253
  ####
1254
 
1255
  ######
1256
 
1257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1258
  else:
1259
- labeled_examples = []
1260
  for line in response.split('\n'):
1261
  if line.strip():
1262
  parts = line.rsplit('Label:', 1)
@@ -1272,13 +1311,15 @@ if "task_choice" in st.session_state:
1272
  'task_type': 'Data Labeling',
1273
  'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1274
  })
1275
- # Save and provide download options
1276
- if labeled_examples:
1277
- # Update session state
1278
- st.session_state.labeled_examples = labeled_examples
 
 
1279
 
1280
- # Convert to CSV and JSON
1281
- df = pd.DataFrame(labeled_examples)
1282
 
1283
  # ########3
1284
  # if labeled_examples:
@@ -1292,18 +1333,54 @@ if "task_choice" in st.session_state:
1292
  # file_name='labeled_examples.csv',
1293
  # mime='text/csv'
1294
  # )
1295
- ###########
1296
- #new 22/4/2025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1297
  # CSV
1298
  st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
1299
-
1300
  # JSON
1301
  st.session_state.labeled_examples_json = json.dumps({
1302
  "metadata": {
1303
  "domain": domain,
1304
  "labels": labels,
1305
  "used_few_shot": use_few_shot,
1306
- "task_type": "Named Entity Recognition (NER)",
1307
  "timestamp": datetime.now().isoformat()
1308
  },
1309
  "examples": labeled_examples
 
1240
  'task_type': 'Named Entity Recognition (NER)',
1241
  'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1242
  })
1243
+ ###newnewnewnew
1244
+ labeled_examples = []
1245
 
1246
+ if classification_type == "Named Entity Recognition (NER)":
1247
+ ner_entities = []
1248
+ for line in response.strip().split('\n'):
1249
+ if line.strip():
1250
+ if '-' in line:
1251
+ entity_text, entity_type = line.rsplit('-', 1)
1252
+ ner_entities.append({
1253
+ 'entity': entity_text.strip(),
1254
+ 'label': entity_type.strip()
1255
+ })
1256
+ labeled_examples = [{
1257
+ 'ner_output': response.strip(),
1258
+ 'entities': ner_entities,
1259
+ 'system_prompt': st.session_state.system_prompt,
1260
+ 'system_role': st.session_state.system_role,
1261
+ 'task_type': 'Named Entity Recognition (NER)',
1262
+ 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1263
+ }]
1264
+
1265
 
1266
+
1267
+ # #new 24/4/2025
1268
+ # # Save and provide download options
1269
+ # if labeled_examples:
1270
+ # # Update session state
1271
+ # st.session_state.labeled_examples = labeled_examples
1272
+
1273
+ # # Convert to CSV and JSON
1274
+ # df = pd.DataFrame(labeled_examples)
1275
 
1276
  ####
1277
 
1278
  ######
1279
 
1280
 
1281
+ # else:
1282
+ # labeled_examples = []
1283
+ # for line in response.split('\n'):
1284
+ # if line.strip():
1285
+ # parts = line.rsplit('Label:', 1)
1286
+ # if len(parts) == 2:
1287
+ # text = parts[0].strip()
1288
+ # label = parts[1].strip()
1289
+ # if text and label:
1290
+ # labeled_examples.append({
1291
+ # 'text': text,
1292
+ # 'label': label,
1293
+ # 'system_prompt': st.session_state.system_prompt,
1294
+ # 'system_role': st.session_state.system_role,
1295
+ # 'task_type': 'Data Labeling',
1296
+ # 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1297
+ # })
1298
  else:
 
1299
  for line in response.split('\n'):
1300
  if line.strip():
1301
  parts = line.rsplit('Label:', 1)
 
1311
  'task_type': 'Data Labeling',
1312
  'Use few-shot example?': 'Yes' if use_few_shot else 'No',
1313
  })
1314
+
1315
+
1316
+ # # Save and provide download options
1317
+ # if labeled_examples:
1318
+ # # Update session state
1319
+ # st.session_state.labeled_examples = labeled_examples
1320
 
1321
+ # # Convert to CSV and JSON
1322
+ # df = pd.DataFrame(labeled_examples)
1323
 
1324
  # ########3
1325
  # if labeled_examples:
 
1333
  # file_name='labeled_examples.csv',
1334
  # mime='text/csv'
1335
  # )
1336
+ # "domain": domain,
1337
+ # "labels": labels,
1338
+ # "used_few_shot": use_few_shot,
1339
+ # "task_###########
1340
+ # #new 22/4/2025
1341
+ # # CSV
1342
+ # st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
1343
+
1344
+ # # JSON
1345
+ # st.session_state.labeled_examples_json = json.dumps({
1346
+ # "metadata": {
1347
+ # type": "Named Entity Recognition (NER)",
1348
+ # "timestamp": datetime.now().isoformat()
1349
+ # },
1350
+ # "examples": labeled_examples
1351
+ # }, indent=2).encode('utf-8')
1352
+ #########newnewnew
1353
+ # Save and provide download options
1354
+ if labeled_examples:
1355
+ st.session_state.labeled_examples = labeled_examples
1356
+
1357
+ if classification_type == "Named Entity Recognition (NER)":
1358
+ # Flatten NER entities for CSV
1359
+ flat_data = []
1360
+ for example in labeled_examples:
1361
+ for ent in example.get('entities', []):
1362
+ flat_data.append({
1363
+ 'entity': ent['entity'],
1364
+ 'label': ent['label'],
1365
+ 'system_prompt': example['system_prompt'],
1366
+ 'system_role': example['system_role'],
1367
+ 'task_type': example['task_type'],
1368
+ 'Use few-shot example?': example['Use few-shot example?']
1369
+ })
1370
+ df = pd.DataFrame(flat_data)
1371
+ else:
1372
+ df = pd.DataFrame(labeled_examples)
1373
+
1374
  # CSV
1375
  st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
1376
+
1377
  # JSON
1378
  st.session_state.labeled_examples_json = json.dumps({
1379
  "metadata": {
1380
  "domain": domain,
1381
  "labels": labels,
1382
  "used_few_shot": use_few_shot,
1383
+ "task_type": classification_type,
1384
  "timestamp": datetime.now().isoformat()
1385
  },
1386
  "examples": labeled_examples