Spaces:
Sleeping
Sleeping
File size: 1,854 Bytes
f0c6f53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import jsonlines
# Input file containing classified data
input_file = "data/thirdStep_file.jsonl"
# Output file to store transformed data
output_file = "data/Full-Labeled-Data-Final-4465.jsonl"
# Threshold for considering a label
threshold = 0.21
# Options for different categories
options = [
{"id": "CapitalRequirements", "text": "Capital Requirements", "meta": "0.00"},
{"id": "ConsumerProtection", "text": "Consumer Protection", "meta": "0.00"},
{"id": "RiskManagement", "text": "Risk Management", "meta": "0.00"},
{"id": "ReportingAndCompliance", "text": "Reporting And Compliance", "meta": "0.00"},
{"id": "CorporateGovernance", "text": "Corporate Governance", "meta": "0.00"}
]
# Function to process each record
def process_record(record):
# Extract text and predicted labels
text = record["text"]
predicted_labels = record["predicted_labels"]
# Determine accepted categories based on threshold
accepted_categories = [label for label, score in predicted_labels.items() if score > threshold]
# Determine answer based on accepted categories
answer = "accept" if accepted_categories else "reject"
# Prepare options with meta
options_with_meta = [
{"id": option["id"], "text": option["text"], "meta": option["meta"]} for option in options
]
# Construct the output record
output_record = {
"text": text,
"cats": predicted_labels,
"accept": accepted_categories,
"answer": answer,
"options": options_with_meta
}
return output_record
# Process input file and write transformed data to output file
with jsonlines.open(input_file, "r") as infile, jsonlines.open(output_file, "w") as outfile:
for record in infile:
output_record = process_record(record)
outfile.write(output_record)
|