File size: 14,014 Bytes
ea99abb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 |
import gradio as gr
from utils.ner_helpers import is_llm_model
import pandas as pd
import json
from typing import Dict, List
from tasks.pos_tagging import pos_tagging
from utils.pos_helpers import *
# POS UI
def pos_ui():
# UI Components
with gr.Row():
with gr.Column(scale=2):
input_text = gr.Textbox(
label="Input Text",
lines=8,
placeholder="Enter text to analyze for part-of-speech tags...",
elem_id="pos-input-text"
)
gr.Examples(
examples=[
["The cat is sitting on the mat."],
["She quickly finished her homework before dinner."]
],
inputs=[input_text],
label="Examples"
)
# Tag selection
with gr.Group():
tag_selection = gr.CheckboxGroup(
label="POS Tags to Display",
# choices=[(f"{tag} - {desc}", tag) for tag, desc in POS_TAG_DESCRIPTIONS.items()],
choices=[tag for tag in POS_TAG_DESCRIPTIONS.keys()],
value=DEFAULT_SELECTED_TAGS,
interactive=True
)
with gr.Row():
select_all_btn = gr.Button("Select All", size="sm")
clear_all_btn = gr.Button("Clear All", size="sm")
# Model selection at the bottom
with gr.Row():
model_dropdown = gr.Dropdown(
POS_MODELS,
value=DEFAULT_MODEL,
label="Model",
interactive=True,
elem_id="pos-model-dropdown"
)
custom_instructions = gr.Textbox(
label="Custom Instructions (optional)",
lines=2,
placeholder="Add any custom instructions for the model...",
elem_id="pos-custom-instructions"
)
# Submit button
submit_btn = gr.Button("Tag Text", variant="primary", elem_id="pos-submit-btn")
# Button event handlers
def select_all_tags():
return gr.CheckboxGroup(value=DEFAULT_SELECTED_TAGS)
def clear_all_tags():
return gr.CheckboxGroup(value=[])
select_all_btn.click(
fn=select_all_tags,
outputs=[tag_selection]
)
clear_all_btn.click(
fn=clear_all_tags,
outputs=[tag_selection]
)
with gr.Column(scale=3):
# Results container with tabs
with gr.Tabs() as output_tabs:
with gr.Tab("Tagged View", id="tagged-view"):
no_results_html = gr.HTML(
"<div style='text-align: center; color: #666; padding: 20px;'>"
"Enter text and click 'Tag Text' to analyze.</div>",
visible=True
)
output_html = gr.HTML(
label="POS Tags",
elem_id="pos-output-html",
visible=False
)
with gr.Tab("Table View", id="table-view"):
no_results_table = gr.HTML(
"<div style='text-align: center; color: #666; padding: 20px;'>"
"Enter text and click 'Tag Text' to analyze.</div>",
visible=True
)
output_table = gr.Dataframe(
label="POS Tags",
headers=["Token", "POS Tag"],
datatype=["str", "str"],
interactive=False,
wrap=True,
elem_id="pos-output-table",
visible=False
)
# Add CSS for the POS tags (scoped to this component)
gr.HTML("""
<style>
#pos-output-html .pos-highlight {
white-space: pre-wrap;
line-height: 1.8;
font-size: 14px;
padding: 15px;
border: 1px solid #e0e0e0;
border-radius: 4px;
background: #f9f9f9;
}
#pos-output-html .pos-token {
display: inline-block;
margin: 0 2px 4px 0;
vertical-align: top;
text-align: center;
}
#pos-output-html .token-text {
display: block;
padding: 2px 8px;
background: #f0f4f8;
border-radius: 4px 4px 0 0;
border: 1px solid #dbe4ed;
border-bottom: none;
font-size: 0.9em;
}
#pos-output-html .pos-tag {
display: block;
padding: 2px 8px;
border-radius: 0 0 4px 4px;
font-size: 0.8em;
font-family: 'Courier New', monospace;
border: 1px solid;
border-top: none;
}
/* Color coding for common POS tags */
#pos-output-html .NOUN { background-color: #e3f2fd; border-color: #bbdefb; color: #0d47a1; }
#pos-output-html .VERB { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; }
#pos-output-html .ADJ { background-color: #fff8e1; border-color: #ffecb3; color: #ff6f00; }
#pos-output-html .ADV { background-color: #f3e5f5; border-color: #e1bee7; color: #4a148c; }
#pos-output-html .PRON { background-color: #e8eaf6; border-color: #c5cae9; color: #1a237e; }
#pos-output-html .DET { background-color: #e0f7fa; border-color: #b2ebf2; color: #006064; }
#pos-output-html .ADP { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; }
#pos-output-html .CONJ, #pos-output-html .CCONJ, #pos-output-html .SCONJ { background-color: #fce4ec; border-color: #f8bbd0; color: #880e4f; }
#pos-output-html .NUM { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; font-weight: bold; }
#pos-output-html .PUNCT { background-color: #f5f5f5; border-color: #e0e0e0; color: #424242; }
#pos-output-html .X, #pos-output-html .SYM { background-color: #fafafa; border-color: #f5f5f5; color: #616161; }
#pos-output-html .PROPN { background-color: #e1f5fe; border-color: #b3e5fc; color: #01579b; font-weight: bold; }
#pos-output-html .AUX { background-color: #f3e5f5; border-color: #e1bee7; color: #6a1b9a; }
#pos-output-html .PART { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; font-style: italic; }
#pos-output-html .INTJ { background-color: #fff3e0; border-color: #ffe0b2; color: #e65100; }
</style>
""")
def format_pos_result(result, selected_tags=None):
import html
if not result or "tokens" not in result or "tags" not in result:
return "<div style='text-align: center; color: #666; padding: 20px;'>No POS tags found or invalid result format.</div>", pd.DataFrame(columns=["Token", "POS Tag"])
if selected_tags is None:
selected_tags = list(POS_TAG_DESCRIPTIONS.keys())
pos_colors = {
"NOUN": "#e3f2fd", "VERB": "#e8f5e9", "ADJ": "#fff8e1",
"ADV": "#f3e5f5", "PRON": "#e8eaf6", "DET": "#e0f7fa",
"ADP": "#f1f8e9", "CONJ": "#fce4ec", "CCONJ": "#fce4ec",
"SCONJ": "#fce4ec", "NUM": "#e8f5e9", "PUNCT": "#f5f5f5",
"X": "#fafafa", "SYM": "#fafafa", "PROPN": "#e1f5fe",
"AUX": "#f3e5f5", "PART": "#f1f8e9", "INTJ": "#fff3e0"
}
html_parts = ['<div style="line-height:1.6;padding:15px;border:1px solid #e0e0e0;border-radius:4px;background:#f9f9f9;white-space:pre-wrap;">']
df_data = []
for word, tag in zip(result["tokens"], result["tags"]):
clean_tag = tag.split('-')[0].split('_')[0].upper()
if clean_tag not in STANDARD_POS_TAGS:
clean_tag = "X"
df_data.append({"Token": word, "POS Tag": clean_tag})
if clean_tag not in selected_tags:
html_parts.append(f'{html.escape(word)} ')
continue
color = pos_colors.get(clean_tag, "#f0f0f0")
html_parts.append(f'<span style="background:{color};border-radius:3px;padding:0 2px;margin:0 1px;border:1px solid rgba(0,0,0,0.1);">')
html_parts.append(f'{html.escape(word)} ')
html_parts.append(f'<span style="font-size:0.7em;font-weight:bold;color:#555;border-radius:2px;padding:0 2px;background:rgba(255,255,255,0.7);">{clean_tag}</span>')
html_parts.append('</span>')
html_parts.append('</div>')
import pandas as pd
df = pd.DataFrame(df_data)
if selected_tags is not None:
df = df[df["POS Tag"].isin(selected_tags)]
df = df.reset_index(drop=True)
return "".join(html_parts), df
def process_pos(text: str, model: str, custom_instructions: str, selected_tags: list):
if not text.strip():
return [
gr.HTML("<div style='color: #f44336; padding: 20px;'>Please enter some text to analyze.</div>", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
use_llm = is_llm_model(model)
if not selected_tags:
selected_tags = list(POS_TAG_DESCRIPTIONS.keys())
try:
yield [
gr.HTML("<div class='pos-highlight'>Processing... This may take a moment for large texts.</div>", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
result = pos_tagging(
text=text,
model=model,
custom_instructions=custom_instructions if use_llm else "",
use_llm=use_llm
)
if "error" in result:
error_msg = result['error']
if "API key" in error_msg or "authentication" in error_msg.lower():
error_msg += " Please check your API key configuration."
yield [
gr.HTML(f"<div style='color: #d32f2f; padding: 20px;'>{error_msg}</div>", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
return
html, table = format_pos_result(result, selected_tags)
if not table.empty:
yield [
gr.HTML(html, visible=True), # output_html
gr.HTML(visible=False), # no_results_html
gr.DataFrame(value=table, visible=True), # output_table
gr.HTML(visible=False) # no_results_table
]
else:
empty_msg = "<div class='pos-highlight' style='text-align: center; color: #666; padding: 20px;'>No POS tags could be extracted from the text.</div>"
yield [
gr.HTML(empty_msg, visible=True), # output_html
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(empty_msg, visible=True) # no_results_table
]
except Exception as e:
import traceback
error_msg = f"Error processing request: {str(e)}\n\n{traceback.format_exc()}"
print(error_msg) # Log the full error
yield [
gr.HTML("<div class='pos-highlight' style='color: #d32f2f; padding: 20px;'>An error occurred while processing your request. Please try again.</div>", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
def update_ui(model_name: str) -> Dict:
use_llm = is_llm_model(model_name)
return {
custom_instructions: gr.Textbox(visible=use_llm)
}
def clear_inputs():
return "", "", ""
model_dropdown.change(
fn=update_ui,
inputs=[model_dropdown],
outputs=[custom_instructions]
)
submit_btn.click(
fn=process_pos,
inputs=[input_text, model_dropdown, custom_instructions, tag_selection],
outputs=[output_html, no_results_html, output_table, no_results_table],
show_progress=True
)
gr.HTML("""
<style>
/* Style for the tabs */
#tagged-view, #table-view {
padding: 15px;
}
/* Make the tabs more visible */
.tab-nav {
margin-bottom: 10px;
border-bottom: 1px solid #e0e0e0;
}
.tab-nav button {
padding: 8px 16px;
margin-right: 5px;
border: 1px solid #e0e0e0;
background: #f5f5f5;
border-radius: 4px 4px 0 0;
cursor: pointer;
}
.tab-nav button.selected {
background: #ffffff;
border-bottom: 2px solid #0e7490;
font-weight: bold;
}
</style>
""")
custom_instructions.visible = is_llm_model(DEFAULT_MODEL)
return None
|