|
import gradio as gr |
|
from utils.ner_helpers import is_llm_model |
|
import pandas as pd |
|
import json |
|
from typing import Dict, List |
|
from tasks.pos_tagging import pos_tagging |
|
from utils.pos_helpers import * |
|
|
|
|
|
|
|
def pos_ui(): |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
input_text = gr.Textbox( |
|
label="Input Text", |
|
lines=8, |
|
placeholder="Enter text to analyze for part-of-speech tags...", |
|
elem_id="pos-input-text" |
|
) |
|
gr.Examples( |
|
examples=[ |
|
["The cat is sitting on the mat."], |
|
["She quickly finished her homework before dinner."] |
|
], |
|
inputs=[input_text], |
|
label="Examples" |
|
) |
|
|
|
with gr.Group(): |
|
tag_selection = gr.CheckboxGroup( |
|
label="POS Tags to Display", |
|
|
|
choices=[tag for tag in POS_TAG_DESCRIPTIONS.keys()], |
|
value=DEFAULT_SELECTED_TAGS, |
|
interactive=True |
|
) |
|
with gr.Row(): |
|
select_all_btn = gr.Button("Select All", size="sm") |
|
clear_all_btn = gr.Button("Clear All", size="sm") |
|
|
|
with gr.Row(): |
|
model_dropdown = gr.Dropdown( |
|
POS_MODELS, |
|
value=DEFAULT_MODEL, |
|
label="Model", |
|
interactive=True, |
|
elem_id="pos-model-dropdown" |
|
) |
|
custom_instructions = gr.Textbox( |
|
label="Custom Instructions (optional)", |
|
lines=2, |
|
placeholder="Add any custom instructions for the model...", |
|
elem_id="pos-custom-instructions" |
|
) |
|
|
|
submit_btn = gr.Button("Tag Text", variant="primary", elem_id="pos-submit-btn") |
|
|
|
def select_all_tags(): |
|
return gr.CheckboxGroup(value=DEFAULT_SELECTED_TAGS) |
|
def clear_all_tags(): |
|
return gr.CheckboxGroup(value=[]) |
|
select_all_btn.click( |
|
fn=select_all_tags, |
|
outputs=[tag_selection] |
|
) |
|
clear_all_btn.click( |
|
fn=clear_all_tags, |
|
outputs=[tag_selection] |
|
) |
|
with gr.Column(scale=3): |
|
|
|
with gr.Tabs() as output_tabs: |
|
with gr.Tab("Tagged View", id="tagged-view"): |
|
no_results_html = gr.HTML( |
|
"<div style='text-align: center; color: #666; padding: 20px;'>" |
|
"Enter text and click 'Tag Text' to analyze.</div>", |
|
visible=True |
|
) |
|
output_html = gr.HTML( |
|
label="POS Tags", |
|
elem_id="pos-output-html", |
|
visible=False |
|
) |
|
with gr.Tab("Table View", id="table-view"): |
|
no_results_table = gr.HTML( |
|
"<div style='text-align: center; color: #666; padding: 20px;'>" |
|
"Enter text and click 'Tag Text' to analyze.</div>", |
|
visible=True |
|
) |
|
output_table = gr.Dataframe( |
|
label="POS Tags", |
|
headers=["Token", "POS Tag"], |
|
datatype=["str", "str"], |
|
interactive=False, |
|
wrap=True, |
|
elem_id="pos-output-table", |
|
visible=False |
|
) |
|
|
|
gr.HTML(""" |
|
<style> |
|
#pos-output-html .pos-highlight { |
|
white-space: pre-wrap; |
|
line-height: 1.8; |
|
font-size: 14px; |
|
padding: 15px; |
|
border: 1px solid #e0e0e0; |
|
border-radius: 4px; |
|
background: #f9f9f9; |
|
} |
|
#pos-output-html .pos-token { |
|
display: inline-block; |
|
margin: 0 2px 4px 0; |
|
vertical-align: top; |
|
text-align: center; |
|
} |
|
#pos-output-html .token-text { |
|
display: block; |
|
padding: 2px 8px; |
|
background: #f0f4f8; |
|
border-radius: 4px 4px 0 0; |
|
border: 1px solid #dbe4ed; |
|
border-bottom: none; |
|
font-size: 0.9em; |
|
} |
|
#pos-output-html .pos-tag { |
|
display: block; |
|
padding: 2px 8px; |
|
border-radius: 0 0 4px 4px; |
|
font-size: 0.8em; |
|
font-family: 'Courier New', monospace; |
|
border: 1px solid; |
|
border-top: none; |
|
} |
|
/* Color coding for common POS tags */ |
|
#pos-output-html .NOUN { background-color: #e3f2fd; border-color: #bbdefb; color: #0d47a1; } |
|
#pos-output-html .VERB { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; } |
|
#pos-output-html .ADJ { background-color: #fff8e1; border-color: #ffecb3; color: #ff6f00; } |
|
#pos-output-html .ADV { background-color: #f3e5f5; border-color: #e1bee7; color: #4a148c; } |
|
#pos-output-html .PRON { background-color: #e8eaf6; border-color: #c5cae9; color: #1a237e; } |
|
#pos-output-html .DET { background-color: #e0f7fa; border-color: #b2ebf2; color: #006064; } |
|
#pos-output-html .ADP { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; } |
|
#pos-output-html .CONJ, #pos-output-html .CCONJ, #pos-output-html .SCONJ { background-color: #fce4ec; border-color: #f8bbd0; color: #880e4f; } |
|
#pos-output-html .NUM { background-color: #e8f5e9; border-color: #c8e6c9; color: #1b5e20; font-weight: bold; } |
|
#pos-output-html .PUNCT { background-color: #f5f5f5; border-color: #e0e0e0; color: #424242; } |
|
#pos-output-html .X, #pos-output-html .SYM { background-color: #fafafa; border-color: #f5f5f5; color: #616161; } |
|
#pos-output-html .PROPN { background-color: #e1f5fe; border-color: #b3e5fc; color: #01579b; font-weight: bold; } |
|
#pos-output-html .AUX { background-color: #f3e5f5; border-color: #e1bee7; color: #6a1b9a; } |
|
#pos-output-html .PART { background-color: #f1f8e9; border-color: #dcedc8; color: #33691e; font-style: italic; } |
|
#pos-output-html .INTJ { background-color: #fff3e0; border-color: #ffe0b2; color: #e65100; } |
|
</style> |
|
""") |
|
def format_pos_result(result, selected_tags=None): |
|
import html |
|
if not result or "tokens" not in result or "tags" not in result: |
|
return "<div style='text-align: center; color: #666; padding: 20px;'>No POS tags found or invalid result format.</div>", pd.DataFrame(columns=["Token", "POS Tag"]) |
|
if selected_tags is None: |
|
selected_tags = list(POS_TAG_DESCRIPTIONS.keys()) |
|
pos_colors = { |
|
"NOUN": "#e3f2fd", "VERB": "#e8f5e9", "ADJ": "#fff8e1", |
|
"ADV": "#f3e5f5", "PRON": "#e8eaf6", "DET": "#e0f7fa", |
|
"ADP": "#f1f8e9", "CONJ": "#fce4ec", "CCONJ": "#fce4ec", |
|
"SCONJ": "#fce4ec", "NUM": "#e8f5e9", "PUNCT": "#f5f5f5", |
|
"X": "#fafafa", "SYM": "#fafafa", "PROPN": "#e1f5fe", |
|
"AUX": "#f3e5f5", "PART": "#f1f8e9", "INTJ": "#fff3e0" |
|
} |
|
html_parts = ['<div style="line-height:1.6;padding:15px;border:1px solid #e0e0e0;border-radius:4px;background:#f9f9f9;white-space:pre-wrap;">'] |
|
df_data = [] |
|
for word, tag in zip(result["tokens"], result["tags"]): |
|
clean_tag = tag.split('-')[0].split('_')[0].upper() |
|
if clean_tag not in STANDARD_POS_TAGS: |
|
clean_tag = "X" |
|
df_data.append({"Token": word, "POS Tag": clean_tag}) |
|
if clean_tag not in selected_tags: |
|
html_parts.append(f'{html.escape(word)} ') |
|
continue |
|
color = pos_colors.get(clean_tag, "#f0f0f0") |
|
html_parts.append(f'<span style="background:{color};border-radius:3px;padding:0 2px;margin:0 1px;border:1px solid rgba(0,0,0,0.1);">') |
|
html_parts.append(f'{html.escape(word)} ') |
|
html_parts.append(f'<span style="font-size:0.7em;font-weight:bold;color:#555;border-radius:2px;padding:0 2px;background:rgba(255,255,255,0.7);">{clean_tag}</span>') |
|
html_parts.append('</span>') |
|
html_parts.append('</div>') |
|
import pandas as pd |
|
df = pd.DataFrame(df_data) |
|
if selected_tags is not None: |
|
df = df[df["POS Tag"].isin(selected_tags)] |
|
df = df.reset_index(drop=True) |
|
return "".join(html_parts), df |
|
def process_pos(text: str, model: str, custom_instructions: str, selected_tags: list): |
|
if not text.strip(): |
|
return [ |
|
gr.HTML("<div style='color: #f44336; padding: 20px;'>Please enter some text to analyze.</div>", visible=True), |
|
gr.HTML(visible=False), |
|
gr.DataFrame(visible=False), |
|
gr.HTML(visible=False) |
|
] |
|
use_llm = is_llm_model(model) |
|
if not selected_tags: |
|
selected_tags = list(POS_TAG_DESCRIPTIONS.keys()) |
|
try: |
|
yield [ |
|
gr.HTML("<div class='pos-highlight'>Processing... This may take a moment for large texts.</div>", visible=True), |
|
gr.HTML(visible=False), |
|
gr.DataFrame(visible=False), |
|
gr.HTML(visible=False) |
|
] |
|
result = pos_tagging( |
|
text=text, |
|
model=model, |
|
custom_instructions=custom_instructions if use_llm else "", |
|
use_llm=use_llm |
|
) |
|
if "error" in result: |
|
error_msg = result['error'] |
|
if "API key" in error_msg or "authentication" in error_msg.lower(): |
|
error_msg += " Please check your API key configuration." |
|
yield [ |
|
gr.HTML(f"<div style='color: #d32f2f; padding: 20px;'>{error_msg}</div>", visible=True), |
|
gr.HTML(visible=False), |
|
gr.DataFrame(visible=False), |
|
gr.HTML(visible=False) |
|
] |
|
return |
|
html, table = format_pos_result(result, selected_tags) |
|
if not table.empty: |
|
yield [ |
|
gr.HTML(html, visible=True), |
|
gr.HTML(visible=False), |
|
gr.DataFrame(value=table, visible=True), |
|
gr.HTML(visible=False) |
|
] |
|
else: |
|
empty_msg = "<div class='pos-highlight' style='text-align: center; color: #666; padding: 20px;'>No POS tags could be extracted from the text.</div>" |
|
yield [ |
|
gr.HTML(empty_msg, visible=True), |
|
gr.HTML(visible=False), |
|
gr.DataFrame(visible=False), |
|
gr.HTML(empty_msg, visible=True) |
|
] |
|
except Exception as e: |
|
import traceback |
|
error_msg = f"Error processing request: {str(e)}\n\n{traceback.format_exc()}" |
|
print(error_msg) |
|
yield [ |
|
gr.HTML("<div class='pos-highlight' style='color: #d32f2f; padding: 20px;'>An error occurred while processing your request. Please try again.</div>", visible=True), |
|
gr.HTML(visible=False), |
|
gr.DataFrame(visible=False), |
|
gr.HTML(visible=False) |
|
] |
|
def update_ui(model_name: str) -> Dict: |
|
use_llm = is_llm_model(model_name) |
|
return { |
|
custom_instructions: gr.Textbox(visible=use_llm) |
|
} |
|
def clear_inputs(): |
|
return "", "", "" |
|
model_dropdown.change( |
|
fn=update_ui, |
|
inputs=[model_dropdown], |
|
outputs=[custom_instructions] |
|
) |
|
submit_btn.click( |
|
fn=process_pos, |
|
inputs=[input_text, model_dropdown, custom_instructions, tag_selection], |
|
outputs=[output_html, no_results_html, output_table, no_results_table], |
|
show_progress=True |
|
) |
|
gr.HTML(""" |
|
<style> |
|
/* Style for the tabs */ |
|
#tagged-view, #table-view { |
|
padding: 15px; |
|
} |
|
/* Make the tabs more visible */ |
|
.tab-nav { |
|
margin-bottom: 10px; |
|
border-bottom: 1px solid #e0e0e0; |
|
} |
|
.tab-nav button { |
|
padding: 8px 16px; |
|
margin-right: 5px; |
|
border: 1px solid #e0e0e0; |
|
background: #f5f5f5; |
|
border-radius: 4px 4px 0 0; |
|
cursor: pointer; |
|
} |
|
.tab-nav button.selected { |
|
background: #ffffff; |
|
border-bottom: 2px solid #0e7490; |
|
font-weight: bold; |
|
} |
|
</style> |
|
""") |
|
custom_instructions.visible = is_llm_model(DEFAULT_MODEL) |
|
return None |
|
|