Upload folder using huggingface_hub
Browse files- app.py +25 -15
- utils/__init__.py +23 -9
- utils/__pycache__/__init__.cpython-310.pyc +0 -0
app.py
CHANGED
@@ -8,16 +8,23 @@ all = load_dataset("raminass/full_opinions_1994_2020")
|
|
8 |
df = pd.DataFrame(all["train"])
|
9 |
choices = []
|
10 |
for index, row in df[df.category == "per_curiam"].iterrows():
|
11 |
-
choices.append((f"""{row["case_name"]}""", row["text"]))
|
12 |
|
13 |
max_textboxes = 100
|
14 |
|
15 |
|
16 |
# https://www.gradio.app/guides/controlling-layout
|
17 |
-
def greet(opinion):
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
chunks = chunk_data(remove_citations(opinion))["text"].to_list()
|
19 |
-
result = average_text(chunks, pipe)
|
20 |
k = len(chunks)
|
|
|
21 |
wrt_boxes = []
|
22 |
for i in range(k):
|
23 |
wrt_boxes.append(gr.Textbox(chunks[i], visible=True))
|
@@ -30,30 +37,33 @@ def greet(opinion):
|
|
30 |
|
31 |
|
32 |
def set_input(drop):
|
33 |
-
return drop
|
34 |
|
35 |
|
36 |
with gr.Blocks() as demo:
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
41 |
|
42 |
-
greet_btn = gr.Button("Predict")
|
43 |
textboxes = []
|
44 |
for i in range(max_textboxes):
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
49 |
textboxes.append(t)
|
50 |
textboxes.append(par_level)
|
51 |
|
52 |
-
drop.select(set_input, inputs=drop, outputs=[opinion])
|
53 |
|
54 |
greet_btn.click(
|
55 |
fn=greet,
|
56 |
-
inputs=opinion,
|
57 |
outputs=[op_level] + textboxes,
|
58 |
)
|
59 |
|
|
|
8 |
df = pd.DataFrame(all["train"])
|
9 |
choices = []
|
10 |
for index, row in df[df.category == "per_curiam"].iterrows():
|
11 |
+
choices.append((f"""{row["case_name"]}""", [row["text"], row["year_filed"]]))
|
12 |
|
13 |
max_textboxes = 100
|
14 |
|
15 |
|
16 |
# https://www.gradio.app/guides/controlling-layout
|
17 |
+
def greet(opinion, year):
|
18 |
+
judges_l = (
|
19 |
+
df[(df["year_filed"] == year) & (df["category"] != "per_curiam")]
|
20 |
+
.author_name.unique()
|
21 |
+
.tolist()
|
22 |
+
)
|
23 |
+
|
24 |
chunks = chunk_data(remove_citations(opinion))["text"].to_list()
|
25 |
+
result = average_text(chunks, pipe, judges_l)
|
26 |
k = len(chunks)
|
27 |
+
|
28 |
wrt_boxes = []
|
29 |
for i in range(k):
|
30 |
wrt_boxes.append(gr.Textbox(chunks[i], visible=True))
|
|
|
37 |
|
38 |
|
39 |
def set_input(drop):
|
40 |
+
return drop[0], drop[1]
|
41 |
|
42 |
|
43 |
with gr.Blocks() as demo:
|
44 |
+
with gr.Row():
|
45 |
+
with gr.Column():
|
46 |
+
opinion = gr.Textbox(label="Opinion")
|
47 |
+
year = gr.Slider(1994, 2020, label="Year")
|
48 |
+
drop = gr.Dropdown(choices=sorted(choices))
|
49 |
+
greet_btn = gr.Button("Predict")
|
50 |
+
op_level = gr.outputs.Label(num_top_classes=13, label="Overall")
|
51 |
|
|
|
52 |
textboxes = []
|
53 |
for i in range(max_textboxes):
|
54 |
+
with gr.Row():
|
55 |
+
t = gr.Textbox(f"Textbox {i}", visible=False, label=f"Paragraph {i+1} Text")
|
56 |
+
par_level = gr.Label(
|
57 |
+
num_top_classes=5, label=f"Paragraph {i+1} Prediction", visible=False
|
58 |
+
)
|
59 |
textboxes.append(t)
|
60 |
textboxes.append(par_level)
|
61 |
|
62 |
+
drop.select(set_input, inputs=drop, outputs=[opinion, year])
|
63 |
|
64 |
greet_btn.click(
|
65 |
fn=greet,
|
66 |
+
inputs=[opinion, year],
|
67 |
outputs=[op_level] + textboxes,
|
68 |
)
|
69 |
|
utils/__init__.py
CHANGED
@@ -10,19 +10,33 @@ with open("utils/label2id.json", "r") as j:
|
|
10 |
label2id = json.loads(j.read())
|
11 |
|
12 |
|
13 |
-
def
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
15 |
result = model(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
pred = {}
|
17 |
-
for c in
|
18 |
-
for
|
19 |
-
if
|
20 |
-
pred[
|
21 |
else:
|
22 |
-
pred[
|
23 |
sumary = {k: round(sum(v) / len(v), 2) for k, v in pred.items()}
|
24 |
-
|
25 |
-
return dict(sorted(sumary.items(), key=lambda x: x[1], reverse=True)),
|
26 |
|
27 |
|
28 |
# def find_case_by_name(df, name):
|
|
|
10 |
label2id = json.loads(j.read())
|
11 |
|
12 |
|
13 |
+
def normaliz_dict(d, target=1.0):
|
14 |
+
raw = sum(d.values())
|
15 |
+
factor = target / raw
|
16 |
+
return {key: value * factor for key, value in d.items()}
|
17 |
+
|
18 |
+
|
19 |
+
def average_text(text, model, judges):
|
20 |
result = model(text)
|
21 |
+
new_res = []
|
22 |
+
for d in result:
|
23 |
+
p = {}
|
24 |
+
for dicts in d:
|
25 |
+
if dicts["label"] in judges:
|
26 |
+
p[dicts["label"]] = round(dicts["score"], 2)
|
27 |
+
p = normaliz_dict(p)
|
28 |
+
new_res.append(p)
|
29 |
+
|
30 |
pred = {}
|
31 |
+
for c in new_res:
|
32 |
+
for k, v in c.items():
|
33 |
+
if k not in pred:
|
34 |
+
pred[k] = [round(v, 2)]
|
35 |
else:
|
36 |
+
pred[k].append(round(v, 2))
|
37 |
sumary = {k: round(sum(v) / len(v), 2) for k, v in pred.items()}
|
38 |
+
sumary = normaliz_dict(sumary)
|
39 |
+
return dict(sorted(sumary.items(), key=lambda x: x[1], reverse=True)), new_res
|
40 |
|
41 |
|
42 |
# def find_case_by_name(df, name):
|
utils/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/utils/__pycache__/__init__.cpython-310.pyc and b/utils/__pycache__/__init__.cpython-310.pyc differ
|
|