Update app103.py
Browse files
app103.py
CHANGED
@@ -1240,23 +1240,62 @@ if "task_choice" in st.session_state:
|
|
1240 |
'task_type': 'Named Entity Recognition (NER)',
|
1241 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
1242 |
})
|
|
|
|
|
1243 |
|
1244 |
-
|
1245 |
-
|
1246 |
-
|
1247 |
-
|
1248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1249 |
|
1250 |
-
|
1251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1252 |
|
1253 |
####
|
1254 |
|
1255 |
######
|
1256 |
|
1257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1258 |
else:
|
1259 |
-
labeled_examples = []
|
1260 |
for line in response.split('\n'):
|
1261 |
if line.strip():
|
1262 |
parts = line.rsplit('Label:', 1)
|
@@ -1272,13 +1311,15 @@ if "task_choice" in st.session_state:
|
|
1272 |
'task_type': 'Data Labeling',
|
1273 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
1274 |
})
|
1275 |
-
|
1276 |
-
|
1277 |
-
|
1278 |
-
|
|
|
|
|
1279 |
|
1280 |
-
|
1281 |
-
|
1282 |
|
1283 |
# ########3
|
1284 |
# if labeled_examples:
|
@@ -1292,18 +1333,54 @@ if "task_choice" in st.session_state:
|
|
1292 |
# file_name='labeled_examples.csv',
|
1293 |
# mime='text/csv'
|
1294 |
# )
|
1295 |
-
|
1296 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1297 |
# CSV
|
1298 |
st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
|
1299 |
-
|
1300 |
# JSON
|
1301 |
st.session_state.labeled_examples_json = json.dumps({
|
1302 |
"metadata": {
|
1303 |
"domain": domain,
|
1304 |
"labels": labels,
|
1305 |
"used_few_shot": use_few_shot,
|
1306 |
-
"task_type":
|
1307 |
"timestamp": datetime.now().isoformat()
|
1308 |
},
|
1309 |
"examples": labeled_examples
|
|
|
1240 |
'task_type': 'Named Entity Recognition (NER)',
|
1241 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
1242 |
})
|
1243 |
+
###newnewnewnew
|
1244 |
+
labeled_examples = []
|
1245 |
|
1246 |
+
if classification_type == "Named Entity Recognition (NER)":
|
1247 |
+
ner_entities = []
|
1248 |
+
for line in response.strip().split('\n'):
|
1249 |
+
if line.strip():
|
1250 |
+
if '-' in line:
|
1251 |
+
entity_text, entity_type = line.rsplit('-', 1)
|
1252 |
+
ner_entities.append({
|
1253 |
+
'entity': entity_text.strip(),
|
1254 |
+
'label': entity_type.strip()
|
1255 |
+
})
|
1256 |
+
labeled_examples = [{
|
1257 |
+
'ner_output': response.strip(),
|
1258 |
+
'entities': ner_entities,
|
1259 |
+
'system_prompt': st.session_state.system_prompt,
|
1260 |
+
'system_role': st.session_state.system_role,
|
1261 |
+
'task_type': 'Named Entity Recognition (NER)',
|
1262 |
+
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
1263 |
+
}]
|
1264 |
+
|
1265 |
|
1266 |
+
|
1267 |
+
# #new 24/4/2025
|
1268 |
+
# # Save and provide download options
|
1269 |
+
# if labeled_examples:
|
1270 |
+
# # Update session state
|
1271 |
+
# st.session_state.labeled_examples = labeled_examples
|
1272 |
+
|
1273 |
+
# # Convert to CSV and JSON
|
1274 |
+
# df = pd.DataFrame(labeled_examples)
|
1275 |
|
1276 |
####
|
1277 |
|
1278 |
######
|
1279 |
|
1280 |
|
1281 |
+
# else:
|
1282 |
+
# labeled_examples = []
|
1283 |
+
# for line in response.split('\n'):
|
1284 |
+
# if line.strip():
|
1285 |
+
# parts = line.rsplit('Label:', 1)
|
1286 |
+
# if len(parts) == 2:
|
1287 |
+
# text = parts[0].strip()
|
1288 |
+
# label = parts[1].strip()
|
1289 |
+
# if text and label:
|
1290 |
+
# labeled_examples.append({
|
1291 |
+
# 'text': text,
|
1292 |
+
# 'label': label,
|
1293 |
+
# 'system_prompt': st.session_state.system_prompt,
|
1294 |
+
# 'system_role': st.session_state.system_role,
|
1295 |
+
# 'task_type': 'Data Labeling',
|
1296 |
+
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
1297 |
+
# })
|
1298 |
else:
|
|
|
1299 |
for line in response.split('\n'):
|
1300 |
if line.strip():
|
1301 |
parts = line.rsplit('Label:', 1)
|
|
|
1311 |
'task_type': 'Data Labeling',
|
1312 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
1313 |
})
|
1314 |
+
|
1315 |
+
|
1316 |
+
# # Save and provide download options
|
1317 |
+
# if labeled_examples:
|
1318 |
+
# # Update session state
|
1319 |
+
# st.session_state.labeled_examples = labeled_examples
|
1320 |
|
1321 |
+
# # Convert to CSV and JSON
|
1322 |
+
# df = pd.DataFrame(labeled_examples)
|
1323 |
|
1324 |
# ########3
|
1325 |
# if labeled_examples:
|
|
|
1333 |
# file_name='labeled_examples.csv',
|
1334 |
# mime='text/csv'
|
1335 |
# )
|
1336 |
+
# "domain": domain,
|
1337 |
+
# "labels": labels,
|
1338 |
+
# "used_few_shot": use_few_shot,
|
1339 |
+
# "task_###########
|
1340 |
+
# #new 22/4/2025
|
1341 |
+
# # CSV
|
1342 |
+
# st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
|
1343 |
+
|
1344 |
+
# # JSON
|
1345 |
+
# st.session_state.labeled_examples_json = json.dumps({
|
1346 |
+
# "metadata": {
|
1347 |
+
# type": "Named Entity Recognition (NER)",
|
1348 |
+
# "timestamp": datetime.now().isoformat()
|
1349 |
+
# },
|
1350 |
+
# "examples": labeled_examples
|
1351 |
+
# }, indent=2).encode('utf-8')
|
1352 |
+
#########newnewnew
|
1353 |
+
# Save and provide download options
|
1354 |
+
if labeled_examples:
|
1355 |
+
st.session_state.labeled_examples = labeled_examples
|
1356 |
+
|
1357 |
+
if classification_type == "Named Entity Recognition (NER)":
|
1358 |
+
# Flatten NER entities for CSV
|
1359 |
+
flat_data = []
|
1360 |
+
for example in labeled_examples:
|
1361 |
+
for ent in example.get('entities', []):
|
1362 |
+
flat_data.append({
|
1363 |
+
'entity': ent['entity'],
|
1364 |
+
'label': ent['label'],
|
1365 |
+
'system_prompt': example['system_prompt'],
|
1366 |
+
'system_role': example['system_role'],
|
1367 |
+
'task_type': example['task_type'],
|
1368 |
+
'Use few-shot example?': example['Use few-shot example?']
|
1369 |
+
})
|
1370 |
+
df = pd.DataFrame(flat_data)
|
1371 |
+
else:
|
1372 |
+
df = pd.DataFrame(labeled_examples)
|
1373 |
+
|
1374 |
# CSV
|
1375 |
st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
|
1376 |
+
|
1377 |
# JSON
|
1378 |
st.session_state.labeled_examples_json = json.dumps({
|
1379 |
"metadata": {
|
1380 |
"domain": domain,
|
1381 |
"labels": labels,
|
1382 |
"used_few_shot": use_few_shot,
|
1383 |
+
"task_type": classification_type,
|
1384 |
"timestamp": datetime.now().isoformat()
|
1385 |
},
|
1386 |
"examples": labeled_examples
|