|
from .cleaning import remove_citations, split_data, split_text, chunk_data |
|
import pandas as pd |
|
import numpy as np |
|
import json |
|
|
|
with open("utils/id2label.json", "r") as j: |
|
id2label = json.loads(j.read()) |
|
|
|
with open("utils/label2id.json", "r") as j: |
|
label2id = json.loads(j.read()) |
|
|
|
|
|
def normaliz_dict(d, target=1.0): |
|
raw = sum(d.values()) |
|
factor = target / raw |
|
return {key: value * factor for key, value in d.items()} |
|
|
|
|
|
def average_text(text, model): |
|
result = model(text) |
|
new_res = [] |
|
for d in result: |
|
p = {} |
|
for dicts in d: |
|
p[dicts["label"]] = dicts["score"] |
|
p = normaliz_dict(p) |
|
new_res.append(p) |
|
|
|
pred = {} |
|
for c in new_res: |
|
for k, v in c.items(): |
|
if k not in pred: |
|
pred[k] = [round(v, 2)] |
|
else: |
|
pred[k].append(round(v, 2)) |
|
sumary = {k: round(sum(v) / len(v), 2) for k, v in pred.items()} |
|
sumary = normaliz_dict(sumary) |
|
return dict(sorted(sumary.items(), key=lambda x: x[1], reverse=True)), new_res |
|
|