raminass commited on
Commit
fb7fb6c
·
1 Parent(s): 71645c3

Upload folder using huggingface_hub

Browse files
app.py CHANGED
@@ -8,16 +8,23 @@ all = load_dataset("raminass/full_opinions_1994_2020")
8
  df = pd.DataFrame(all["train"])
9
  choices = []
10
  for index, row in df[df.category == "per_curiam"].iterrows():
11
- choices.append((f"""{row["case_name"]}""", row["text"]))
12
 
13
  max_textboxes = 100
14
 
15
 
16
  # https://www.gradio.app/guides/controlling-layout
17
- def greet(opinion):
 
 
 
 
 
 
18
  chunks = chunk_data(remove_citations(opinion))["text"].to_list()
19
- result = average_text(chunks, pipe)
20
  k = len(chunks)
 
21
  wrt_boxes = []
22
  for i in range(k):
23
  wrt_boxes.append(gr.Textbox(chunks[i], visible=True))
@@ -30,30 +37,33 @@ def greet(opinion):
30
 
31
 
32
  def set_input(drop):
33
- return drop
34
 
35
 
36
  with gr.Blocks() as demo:
37
- opinion = gr.Textbox(label="Opinion")
38
- op_level = gr.outputs.Label(num_top_classes=13, label="Overall")
39
-
40
- drop = gr.Dropdown(choices=sorted(choices))
 
 
 
41
 
42
- greet_btn = gr.Button("Predict")
43
  textboxes = []
44
  for i in range(max_textboxes):
45
- t = gr.Textbox(f"Textbox {i}", visible=False, label=f"Paragraph {i+1} Text")
46
- par_level = gr.Label(
47
- num_top_classes=5, label=f"Paragraph {i+1} Prediction", visible=False
48
- )
 
49
  textboxes.append(t)
50
  textboxes.append(par_level)
51
 
52
- drop.select(set_input, inputs=drop, outputs=[opinion])
53
 
54
  greet_btn.click(
55
  fn=greet,
56
- inputs=opinion,
57
  outputs=[op_level] + textboxes,
58
  )
59
 
 
8
  df = pd.DataFrame(all["train"])
9
  choices = []
10
  for index, row in df[df.category == "per_curiam"].iterrows():
11
+ choices.append((f"""{row["case_name"]}""", [row["text"], row["year_filed"]]))
12
 
13
  max_textboxes = 100
14
 
15
 
16
  # https://www.gradio.app/guides/controlling-layout
17
+ def greet(opinion, year):
18
+ judges_l = (
19
+ df[(df["year_filed"] == year) & (df["category"] != "per_curiam")]
20
+ .author_name.unique()
21
+ .tolist()
22
+ )
23
+
24
  chunks = chunk_data(remove_citations(opinion))["text"].to_list()
25
+ result = average_text(chunks, pipe, judges_l)
26
  k = len(chunks)
27
+
28
  wrt_boxes = []
29
  for i in range(k):
30
  wrt_boxes.append(gr.Textbox(chunks[i], visible=True))
 
37
 
38
 
39
  def set_input(drop):
40
+ return drop[0], drop[1]
41
 
42
 
43
  with gr.Blocks() as demo:
44
+ with gr.Row():
45
+ with gr.Column():
46
+ opinion = gr.Textbox(label="Opinion")
47
+ year = gr.Slider(1994, 2020, label="Year")
48
+ drop = gr.Dropdown(choices=sorted(choices))
49
+ greet_btn = gr.Button("Predict")
50
+ op_level = gr.outputs.Label(num_top_classes=13, label="Overall")
51
 
 
52
  textboxes = []
53
  for i in range(max_textboxes):
54
+ with gr.Row():
55
+ t = gr.Textbox(f"Textbox {i}", visible=False, label=f"Paragraph {i+1} Text")
56
+ par_level = gr.Label(
57
+ num_top_classes=5, label=f"Paragraph {i+1} Prediction", visible=False
58
+ )
59
  textboxes.append(t)
60
  textboxes.append(par_level)
61
 
62
+ drop.select(set_input, inputs=drop, outputs=[opinion, year])
63
 
64
  greet_btn.click(
65
  fn=greet,
66
+ inputs=[opinion, year],
67
  outputs=[op_level] + textboxes,
68
  )
69
 
utils/__init__.py CHANGED
@@ -10,19 +10,33 @@ with open("utils/label2id.json", "r") as j:
10
  label2id = json.loads(j.read())
11
 
12
 
13
- def average_text(text, model):
14
- # result = classifier(df_train[(df_train.case_name==case) & (df_train.category=='per_curiam')]['clean_text'].to_list())
 
 
 
 
 
15
  result = model(text)
 
 
 
 
 
 
 
 
 
16
  pred = {}
17
- for c in result:
18
- for d in c:
19
- if d["label"] not in pred:
20
- pred[d["label"]] = [round(d["score"], 2)]
21
  else:
22
- pred[d["label"]].append(round(d["score"], 2))
23
  sumary = {k: round(sum(v) / len(v), 2) for k, v in pred.items()}
24
- result = [{dct["label"]: round(dct["score"], 2) for dct in lst} for lst in result]
25
- return dict(sorted(sumary.items(), key=lambda x: x[1], reverse=True)), result
26
 
27
 
28
  # def find_case_by_name(df, name):
 
10
  label2id = json.loads(j.read())
11
 
12
 
13
+ def normaliz_dict(d, target=1.0):
14
+ raw = sum(d.values())
15
+ factor = target / raw
16
+ return {key: value * factor for key, value in d.items()}
17
+
18
+
19
+ def average_text(text, model, judges):
20
  result = model(text)
21
+ new_res = []
22
+ for d in result:
23
+ p = {}
24
+ for dicts in d:
25
+ if dicts["label"] in judges:
26
+ p[dicts["label"]] = round(dicts["score"], 2)
27
+ p = normaliz_dict(p)
28
+ new_res.append(p)
29
+
30
  pred = {}
31
+ for c in new_res:
32
+ for k, v in c.items():
33
+ if k not in pred:
34
+ pred[k] = [round(v, 2)]
35
  else:
36
+ pred[k].append(round(v, 2))
37
  sumary = {k: round(sum(v) / len(v), 2) for k, v in pred.items()}
38
+ sumary = normaliz_dict(sumary)
39
+ return dict(sorted(sumary.items(), key=lambda x: x[1], reverse=True)), new_res
40
 
41
 
42
  # def find_case_by_name(df, name):
utils/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/__init__.cpython-310.pyc and b/utils/__pycache__/__init__.cpython-310.pyc differ