File size: 1,303 Bytes
81d4aee 8ddc567 81d4aee 8ddc567 81d4aee 8ddc567 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from .cleaning import remove_citations, split_data, split_text, chunk_data
import pandas as pd
import numpy as np
import json
with open("utils/id2label.json", "r") as j:
id2label = json.loads(j.read())
with open("utils/label2id.json", "r") as j:
label2id = json.loads(j.read())
def average_text(text, model):
# result = classifier(df_train[(df_train.case_name==case) & (df_train.category=='per_curiam')]['clean_text'].to_list())
result = model(text)
pred = {}
for c in result:
for d in c:
if d["label"] not in pred:
pred[d["label"]] = [round(d["score"], 2)]
else:
pred[d["label"]].append(round(d["score"], 2))
sumary = {k: round(sum(v) / len(v), 2) for k, v in pred.items()}
result = [{dct["label"]: round(dct["score"], 2) for dct in lst} for lst in result]
return dict(sorted(sumary.items(), key=lambda x: x[1], reverse=True)), result
# def find_case_by_name(df, name):
# return display(
# HTML(
# df[df["case_name"].str.contains(name)]
# .iloc[:, :-1]
# .to_html(render_links=True, escape=False)
# )
# )
# def head_df(df):
# return display(
# HTML(df.iloc[:, :-1].head().to_html(render_links=True, escape=False))
# )
|