import gradio as gr
from utils.ner_helpers import is_llm_model
import pandas as pd
import json
from typing import Dict, List
from tasks.pos_tagging import pos_tagging
from utils.pos_helpers import *
# POS UI
def pos_ui():
# UI Components
with gr.Row():
with gr.Column(scale=2):
input_text = gr.Textbox(
label="Input Text",
lines=8,
placeholder="Enter text to analyze for part-of-speech tags...",
elem_id="pos-input-text"
)
gr.Examples(
examples=[
["The cat is sitting on the mat."],
["She quickly finished her homework before dinner."]
],
inputs=[input_text],
label="Examples"
)
# Tag selection
with gr.Group():
tag_selection = gr.CheckboxGroup(
label="POS Tags to Display",
# choices=[(f"{tag} - {desc}", tag) for tag, desc in POS_TAG_DESCRIPTIONS.items()],
choices=[tag for tag in POS_TAG_DESCRIPTIONS.keys()],
value=DEFAULT_SELECTED_TAGS,
interactive=True
)
with gr.Row():
select_all_btn = gr.Button("Select All", size="sm")
clear_all_btn = gr.Button("Clear All", size="sm")
# Model selection at the bottom
with gr.Row():
model_dropdown = gr.Dropdown(
POS_MODELS,
value=DEFAULT_MODEL,
label="Model",
interactive=True,
elem_id="pos-model-dropdown"
)
custom_instructions = gr.Textbox(
label="Custom Instructions (optional)",
lines=2,
placeholder="Add any custom instructions for the model...",
elem_id="pos-custom-instructions"
)
# Submit button
submit_btn = gr.Button("Tag Text", variant="primary", elem_id="pos-submit-btn")
# Button event handlers
def select_all_tags():
return gr.CheckboxGroup(value=DEFAULT_SELECTED_TAGS)
def clear_all_tags():
return gr.CheckboxGroup(value=[])
select_all_btn.click(
fn=select_all_tags,
outputs=[tag_selection]
)
clear_all_btn.click(
fn=clear_all_tags,
outputs=[tag_selection]
)
with gr.Column(scale=3):
# Results container with tabs
with gr.Tabs() as output_tabs:
with gr.Tab("Tagged View", id="tagged-view"):
no_results_html = gr.HTML(
"
"
"Enter text and click 'Tag Text' to analyze.
",
visible=True
)
output_html = gr.HTML(
label="POS Tags",
elem_id="pos-output-html",
visible=False
)
with gr.Tab("Table View", id="table-view"):
no_results_table = gr.HTML(
""
"Enter text and click 'Tag Text' to analyze.
",
visible=True
)
output_table = gr.Dataframe(
label="POS Tags",
headers=["Token", "POS Tag"],
datatype=["str", "str"],
interactive=False,
wrap=True,
elem_id="pos-output-table",
visible=False
)
# Add CSS for the POS tags (scoped to this component)
gr.HTML("""
""")
def format_pos_result(result, selected_tags=None):
import html
if not result or "tokens" not in result or "tags" not in result:
return "No POS tags found or invalid result format.
", pd.DataFrame(columns=["Token", "POS Tag"])
if selected_tags is None:
selected_tags = list(POS_TAG_DESCRIPTIONS.keys())
pos_colors = {
"NOUN": "#e3f2fd", "VERB": "#e8f5e9", "ADJ": "#fff8e1",
"ADV": "#f3e5f5", "PRON": "#e8eaf6", "DET": "#e0f7fa",
"ADP": "#f1f8e9", "CONJ": "#fce4ec", "CCONJ": "#fce4ec",
"SCONJ": "#fce4ec", "NUM": "#e8f5e9", "PUNCT": "#f5f5f5",
"X": "#fafafa", "SYM": "#fafafa", "PROPN": "#e1f5fe",
"AUX": "#f3e5f5", "PART": "#f1f8e9", "INTJ": "#fff3e0"
}
html_parts = ['']
df_data = []
for word, tag in zip(result["tokens"], result["tags"]):
clean_tag = tag.split('-')[0].split('_')[0].upper()
if clean_tag not in STANDARD_POS_TAGS:
clean_tag = "X"
df_data.append({"Token": word, "POS Tag": clean_tag})
if clean_tag not in selected_tags:
html_parts.append(f'{html.escape(word)} ')
continue
color = pos_colors.get(clean_tag, "#f0f0f0")
html_parts.append(f'')
html_parts.append(f'{html.escape(word)} ')
html_parts.append(f'{clean_tag}')
html_parts.append('')
html_parts.append('
')
import pandas as pd
df = pd.DataFrame(df_data)
if selected_tags is not None:
df = df[df["POS Tag"].isin(selected_tags)]
df = df.reset_index(drop=True)
return "".join(html_parts), df
def process_pos(text: str, model: str, custom_instructions: str, selected_tags: list):
if not text.strip():
return [
gr.HTML("Please enter some text to analyze.
", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
use_llm = is_llm_model(model)
if not selected_tags:
selected_tags = list(POS_TAG_DESCRIPTIONS.keys())
try:
yield [
gr.HTML("Processing... This may take a moment for large texts.
", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
result = pos_tagging(
text=text,
model=model,
custom_instructions=custom_instructions if use_llm else "",
use_llm=use_llm
)
if "error" in result:
error_msg = result['error']
if "API key" in error_msg or "authentication" in error_msg.lower():
error_msg += " Please check your API key configuration."
yield [
gr.HTML(f"{error_msg}
", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
return
html, table = format_pos_result(result, selected_tags)
if not table.empty:
yield [
gr.HTML(html, visible=True), # output_html
gr.HTML(visible=False), # no_results_html
gr.DataFrame(value=table, visible=True), # output_table
gr.HTML(visible=False) # no_results_table
]
else:
empty_msg = "No POS tags could be extracted from the text.
"
yield [
gr.HTML(empty_msg, visible=True), # output_html
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(empty_msg, visible=True) # no_results_table
]
except Exception as e:
import traceback
error_msg = f"Error processing request: {str(e)}\n\n{traceback.format_exc()}"
print(error_msg) # Log the full error
yield [
gr.HTML("An error occurred while processing your request. Please try again.
", visible=True),
gr.HTML(visible=False), # no_results_html
gr.DataFrame(visible=False), # output_table
gr.HTML(visible=False) # no_results_table
]
def update_ui(model_name: str) -> Dict:
use_llm = is_llm_model(model_name)
return {
custom_instructions: gr.Textbox(visible=use_llm)
}
def clear_inputs():
return "", "", ""
model_dropdown.change(
fn=update_ui,
inputs=[model_dropdown],
outputs=[custom_instructions]
)
submit_btn.click(
fn=process_pos,
inputs=[input_text, model_dropdown, custom_instructions, tag_selection],
outputs=[output_html, no_results_html, output_table, no_results_table],
show_progress=True
)
gr.HTML("""
""")
custom_instructions.visible = is_llm_model(DEFAULT_MODEL)
return None