kcarnold commited on
Commit
dc6136e
·
1 Parent(s): 865abea

Upgrade highlight-edits

Browse files
Files changed (1) hide show
  1. app.py +36 -14
app.py CHANGED
@@ -72,18 +72,25 @@ def rewrite_with_predictions():
72
  st.button(token_display, on_click=append_token, args=(token,), key=i, use_container_width=True)
73
 
74
 
 
 
 
 
 
 
75
  def highlight_edits():
76
  st.title("Highlight locations for possible edits")
77
-
78
  import html
79
  prompt = get_prompt()
80
  st.write("Prompt:", prompt)
81
- doc = st.text_area("Document", placeholder="Paste your document here.")
82
- updated_doc = st.text_area("Updated Doc", placeholder="Your edited document. Leave this blank to use your original document.")
 
 
 
83
 
84
-
85
- response = requests.get("https://tools.kenarnold.org/api/highlights", params=dict(prompt=prompt, doc=doc, updated_doc=updated_doc))
86
- spans = response.json()['highlights']
87
 
88
  if len(spans) < 2:
89
  st.write("No spans found.")
@@ -93,19 +100,34 @@ def highlight_edits():
93
  for span in spans:
94
  span['loss_ratio'] = span['token_loss'] / highest_loss
95
 
 
 
 
 
 
 
 
 
 
 
 
96
  html_out = ''
97
  for span in spans:
98
- is_different = span['token'] != span['most_likely_token']
99
- html_out += '<span style="color: {color}" title="{title}">{orig_token}</span>'.format(
100
- color="blue" if is_different else "black",
101
- title=html.escape(span["most_likely_token"]).replace('\n', ' '),
102
- orig_token=html.escape(span["token"]).replace('\n', '<br>')
 
 
103
  )
104
- html_out = f"<p style=\"background: white;\">{html_out}</p>"
105
 
106
  st.write(html_out, unsafe_allow_html=True)
107
- import pandas as pd
108
- st.write(pd.DataFrame(spans)[['token', 'token_loss', 'most_likely_token', 'loss_ratio']])
 
 
109
 
110
 
111
  rewrite_page = st.Page(rewrite_with_predictions, title="Rewrite with predictions", icon="📝")
 
72
  st.button(token_display, on_click=append_token, args=(token,), key=i, use_container_width=True)
73
 
74
 
75
+ @st.cache_data
76
+ def get_highlights(prompt, doc, updated_doc):
77
+ response = requests.get("https://tools.kenarnold.org/api/highlights", params=dict(prompt=prompt, doc=doc, updated_doc=updated_doc))
78
+ return response.json()['highlights']
79
+
80
+
81
  def highlight_edits():
82
  st.title("Highlight locations for possible edits")
83
+
84
  import html
85
  prompt = get_prompt()
86
  st.write("Prompt:", prompt)
87
+ cols = st.columns(2)
88
+ with cols[0]:
89
+ doc = st.text_area("Document", "Deep learning neural network technology advances are pretty cool if you are careful to use it in ways that don't take stuff from people.", height=300)
90
+ with cols[1]:
91
+ updated_doc = st.text_area("Updated Doc", placeholder="Your edited document. Leave this blank to use your original document.", height=300)
92
 
93
+ spans = get_highlights(prompt, doc, updated_doc)
 
 
94
 
95
  if len(spans) < 2:
96
  st.write("No spans found.")
 
100
  for span in spans:
101
  span['loss_ratio'] = span['token_loss'] / highest_loss
102
 
103
+ num_different = sum(span['token'] != span['most_likely_token'] for span in spans)
104
+ loss_ratios_for_different = [span['loss_ratio'] for span in spans if span['token'] != span['most_likely_token']]
105
+ loss_ratios_for_different.sort(reverse=True)
106
+
107
+ if num_different == 0:
108
+ st.write("No possible edits found.")
109
+ st.stop()
110
+
111
+ num_to_show = st.slider("Number of edits to show", 1, num_different, value=num_different // 2)
112
+ min_loss = loss_ratios_for_different[num_to_show - 1]
113
+
114
  html_out = ''
115
  for span in spans:
116
+ show = span['token'] != span['most_likely_token'] and span['loss_ratio'] >= min_loss
117
+ hover = f'<span style="position: absolute; top: -10px; left: 5px; font-size: 10px; min-width:6em; line-height: 1; color: grey; transform-origin: left; transform: rotate(-15deg)">{span["most_likely_token"]}</span>'
118
+ html_out += '<span style="position: relative; color: {color}" title="{title}">{hover}{orig_token}</span>'.format(
119
+ color="blue" if show else "black",
120
+ title=html.escape(span["most_likely_token"]).replace('\n', ' ') if show else '',
121
+ orig_token=html.escape(span["token"]).replace('\n', '<br>'),
122
+ hover=hover if show else ''
123
  )
124
+ html_out = f"<p style=\"background: white; line-height: 2.5;\">{html_out}</p>"
125
 
126
  st.write(html_out, unsafe_allow_html=True)
127
+ if st.checkbox("Show details"):
128
+ import pandas as pd
129
+ st.write(pd.DataFrame(spans)[['token', 'token_loss', 'most_likely_token', 'loss_ratio']])
130
+ st.write("Token loss is the difference between the original token and the most likely token. The loss ratio is the token loss divided by the highest token loss in the document.")
131
 
132
 
133
  rewrite_page = st.Page(rewrite_with_predictions, title="Rewrite with predictions", icon="📝")