Ling / ui /pos_ui.py
Nam Fam
update files
ea99abb
import gradio as gr
from utils.ner_helpers import is_llm_model
import pandas as pd
import json
from typing import Dict, List
from tasks.pos_tagging import pos_tagging
from utils.pos_helpers import *
# POS UI
def pos_ui():
# UI Components
with gr.Row():
with gr.Column(scale=2):
input_text = gr.Textbox(
label="Input Text",
lines=8,
placeholder="Enter text to analyze for part-of-speech tags...",
elem_id="pos-input-text"
)
gr.Examples(
examples=[
["The cat is sitting on the mat."],
["She quickly finished her homework before dinner."]
],
inputs=[input_text],
label="Examples"
)
# Tag selection
with gr.Group():
tag_selection = gr.CheckboxGroup(
label="POS Tags to Display",
# choices=[(f"{tag} - {desc}", tag) for tag, desc in POS_TAG_DESCRIPTIONS.items()],
choices=[tag for tag in POS_TAG_DESCRIPTIONS.keys()],
value=DEFAULT_SELECTED_TAGS,
interactive=True
)
with gr.Row():
select_all_btn = gr.Button("Select All", size="sm")
clear_all_btn = gr.Button("Clear All", size="sm")
# Model selection at the bottom
with gr.Row():
model_dropdown = gr.Dropdown(
POS_MODELS,
value=DEFAULT_MODEL,
label="Model",
interactive=True,
elem_id="pos-model-dropdown"
)
custom_instructions = gr.Textbox(
label="Custom Instructions (optional)",
lines=2,
placeholder="Add any custom instructions for the model...",
elem_id="pos-custom-instructions"
)
# Submit button
submit_btn = gr.Button("Tag Text", variant="primary", elem_id="pos-submit-btn")
# Button event handlers
def select_all_tags():
return gr.CheckboxGroup(value=DEFAULT_SELECTED_TAGS)
def clear_all_tags():
return gr.CheckboxGroup(value=[])
select_all_btn.click(
fn=select_all_tags,
outputs=[tag_selection]
)
clear_all_btn.click(
fn=clear_all_tags,
outputs=[tag_selection]
)
with gr.Column(scale=3):
# Results container with tabs
with gr.Tabs() as output_tabs:
with gr.Tab("Tagged View", id="tagged-view"):
no_results_html = gr.HTML(
"<div style='text-align: center; color: #666; padding: 20px;'>"
"Enter text and click 'Tag Text' to analyze.</div>",
visible=True
)
output_html = gr.HTML(
label="POS Tags",
elem_id="pos-output-html",
visible=False
)
with gr.Tab("Table View", id="table-view"):
no_results_table = gr.HTML(
"<div style='text-align: center; color: #666; padding: 20px;'>"
"Enter text and click 'Tag Text' to analyze.</div>",
visible=True
)
output_table = gr.Dataframe(
label="POS Tags",
headers=["Token", "POS Tag"],
datatype=["str", "str"],
interactive=False,
wrap=True,
elem_id="pos-output-table",
visible=False
)
# Add CSS for the POS tags (scoped to this component)
gr.HTML("""
<style>
#pos-output-html .pos-highlight {
white-space: pre-wrap;
line-height: 1.8;
font-size: 14px;
padding: 15px;
border: 1px solid #e0e0e0;
border-radius: 4px;
background: #f9f9f9;
}
#pos-output-html .pos-token {
display: inline-block;
margin: 0 2px 4px 0;
vertical-align: top;
text-align: center;
}
#pos-output-html .token-text {
display: block;
padding: 2px 8px;
background: #f0f4f8;
border-radius: 4px 4px 0 0;
border: 1px solid #dbe4ed;
border-bottom: none;
font-size: 0.9em;
}
#pos-output-html .pos-tag {
display: block;
padding: 2px 8px;
border-radius: 0 0 4px 4px;
font-size: 0.8em;
font-family: 'Courier New', monospace;
border: 1px solid;
border-top: none;
}
/* Color coding for common POS tags */
#pos-output-html .NOUN { background-color: #e3f2fd; border-color: #bbdefb; color: #0d47a1; }
#pos-output-html .VERB { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; }
#pos-output-html .ADJ { background-color: #fff8e1; border-color: #ffecb3; color: #ff6f00; }
#pos-output-html .ADV { background-color: #f3e5f5; border-color: #e1bee7; color: #4a148c; }
#pos-output-html .PRON { background-color: #e8eaf6; border-color: #c5cae9; color: #1a237e; }
#pos-output-html .DET { background-color: #e0f7fa; border-color: #b2ebf2; color: #006064; }
#pos-output-html .ADP { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; }
#pos-output-html .CONJ, #pos-output-html .CCONJ, #pos-output-html .SCONJ { background-color: #fce4ec; border-color: #f8bbd0; color: #880e4f; }
#pos-output-html .NUM { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; font-weight: bold; }
#pos-output-html .PUNCT { background-color: #f5f5f5; border-color: #e0e0e0; color: #424242; }
#pos-output-html .X, #pos-output-html .SYM { background-color: #fafafa; border-color: #f5f5f5; color: #616161; }
#pos-output-html .PROPN { background-color: #e1f5fe; border-color: #b3e5fc; color: #01579b; font-weight: bold; }
#pos-output-html .AUX { background-color: #f3e5f5; border-color: #e1bee7; color: #6a1b9a; }
#pos-output-html .PART { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; font-style: italic; }
#pos-output-html .INTJ { background-color: #fff3e0; border-color: #ffe0b2; color: #e65100; }
</style>
""")
def format_pos_result(result, selected_tags=None):
import html
if not result or "tokens" not in result or "tags" not in result:
return "<div style='text-align: center; color: #666; padding: 20px;'>No POS tags found or invalid result format.</div>", pd.DataFrame(columns=["Token", "POS Tag"])
if selected_tags is None:
selected_tags = list(POS_TAG_DESCRIPTIONS.keys())
pos_colors = {
"NOUN": "#e3f2fd", "VERB": "#e8f5e9", "ADJ": "#fff8e1",
"ADV": "#f3e5f5", "PRON": "#e8eaf6", "DET": "#e0f7fa",
"ADP": "#f1f8e9", "CONJ": "#fce4ec", "CCONJ": "#fce4ec",
"SCONJ": "#fce4ec", "NUM": "#e8f5e9", "PUNCT": "#f5f5f5",
"X": "#fafafa", "SYM": "#fafafa", "PROPN": "#e1f5fe",
"AUX": "#f3e5f5", "PART": "#f1f8e9", "INTJ": "#fff3e0"
}
html_parts = ['<div style="line-height:1.6;padding:15px;border:1px solid #e0e0e0;border-radius:4px;background:#f9f9f9;white-space:pre-wrap;">']
df_data = []
for word, tag in zip(result["tokens"], result["tags"]):
clean_tag = tag.split('-')[0].split('_')[0].upper()
if clean_tag not in STANDARD_POS_TAGS:
clean_tag = "X"
df_data.append({"Token": word, "POS Tag": clean_tag})
if clean_tag not in selected_tags:
html_parts.append(f'{html.escape(word)} ')
continue
color = pos_colors.get(clean_tag, "#f0f0f0")
html_parts.append(f'<span style="background:{color};border-radius:3px;padding:0 2px;margin:0 1px;border:1px solid rgba(0,0,0,0.1);">')
html_parts.append(f'{html.escape(word)} ')
html_parts.append(f'<span style="font-size:0.7em;font-weight:bold;color:#555;border-radius:2px;padding:0 2px;background:rgba(255,255,255,0.7);">{clean_tag}</span>')
html_parts.append('</span>')
html_parts.append('</div>')
import pandas as pd
df = pd.DataFrame(df_data)
if selected_tags is not None:
df = df[df["POS Tag"].isin(selected_tags)]
df = df.reset_index(drop=True)
return "".join(html_parts), df
def process_pos(text: str, model: str, custom_instructions: str, selected_tags: list):
if not text.strip():
return [
gr.HTML("<div style='color: #f44336; padding: 20px;'>Please enter some text to analyze.</div>", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
use_llm = is_llm_model(model)
if not selected_tags:
selected_tags = list(POS_TAG_DESCRIPTIONS.keys())
try:
yield [
gr.HTML("<div class='pos-highlight'>Processing... This may take a moment for large texts.</div>", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
result = pos_tagging(
text=text,
model=model,
custom_instructions=custom_instructions if use_llm else "",
use_llm=use_llm
)
if "error" in result:
error_msg = result['error']
if "API key" in error_msg or "authentication" in error_msg.lower():
error_msg += " Please check your API key configuration."
yield [
gr.HTML(f"<div style='color: #d32f2f; padding: 20px;'>{error_msg}</div>", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
return
html, table = format_pos_result(result, selected_tags)
if not table.empty:
yield [
gr.HTML(html, visible=True), # output_html
gr.HTML(visible=False), # no_results_html
gr.DataFrame(value=table, visible=True), # output_table
gr.HTML(visible=False) # no_results_table
]
else:
empty_msg = "<div class='pos-highlight' style='text-align: center; color: #666; padding: 20px;'>No POS tags could be extracted from the text.</div>"
yield [
gr.HTML(empty_msg, visible=True), # output_html
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(empty_msg, visible=True) # no_results_table
]
except Exception as e:
import traceback
error_msg = f"Error processing request: {str(e)}\n\n{traceback.format_exc()}"
print(error_msg) # Log the full error
yield [
gr.HTML("<div class='pos-highlight' style='color: #d32f2f; padding: 20px;'>An error occurred while processing your request. Please try again.</div>", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
def update_ui(model_name: str) -> Dict:
use_llm = is_llm_model(model_name)
return {
custom_instructions: gr.Textbox(visible=use_llm)
}
def clear_inputs():
return "", "", ""
model_dropdown.change(
fn=update_ui,
inputs=[model_dropdown],
outputs=[custom_instructions]
)
submit_btn.click(
fn=process_pos,
inputs=[input_text, model_dropdown, custom_instructions, tag_selection],
outputs=[output_html, no_results_html, output_table, no_results_table],
show_progress=True
)
gr.HTML("""
<style>
/* Style for the tabs */
#tagged-view, #table-view {
padding: 15px;
}
/* Make the tabs more visible */
.tab-nav {
margin-bottom: 10px;
border-bottom: 1px solid #e0e0e0;
}
.tab-nav button {
padding: 8px 16px;
margin-right: 5px;
border: 1px solid #e0e0e0;
background: #f5f5f5;
border-radius: 4px 4px 0 0;
cursor: pointer;
}
.tab-nav button.selected {
background: #ffffff;
border-bottom: 2px solid #0e7490;
font-weight: bold;
}
</style>
""")
custom_instructions.visible = is_llm_model(DEFAULT_MODEL)
return None