import gradio as gr from utils.ner_helpers import is_llm_model import pandas as pd import json from typing import Dict, List from tasks.pos_tagging import pos_tagging from utils.pos_helpers import * # POS UI def pos_ui(): # UI Components with gr.Row(): with gr.Column(scale=2): input_text = gr.Textbox( label="Input Text", lines=8, placeholder="Enter text to analyze for part-of-speech tags...", elem_id="pos-input-text" ) gr.Examples( examples=[ ["The cat is sitting on the mat."], ["She quickly finished her homework before dinner."] ], inputs=[input_text], label="Examples" ) # Tag selection with gr.Group(): tag_selection = gr.CheckboxGroup( label="POS Tags to Display", # choices=[(f"{tag} - {desc}", tag) for tag, desc in POS_TAG_DESCRIPTIONS.items()], choices=[tag for tag in POS_TAG_DESCRIPTIONS.keys()], value=DEFAULT_SELECTED_TAGS, interactive=True ) with gr.Row(): select_all_btn = gr.Button("Select All", size="sm") clear_all_btn = gr.Button("Clear All", size="sm") # Model selection at the bottom with gr.Row(): model_dropdown = gr.Dropdown( POS_MODELS, value=DEFAULT_MODEL, label="Model", interactive=True, elem_id="pos-model-dropdown" ) custom_instructions = gr.Textbox( label="Custom Instructions (optional)", lines=2, placeholder="Add any custom instructions for the model...", elem_id="pos-custom-instructions" ) # Submit button submit_btn = gr.Button("Tag Text", variant="primary", elem_id="pos-submit-btn") # Button event handlers def select_all_tags(): return gr.CheckboxGroup(value=DEFAULT_SELECTED_TAGS) def clear_all_tags(): return gr.CheckboxGroup(value=[]) select_all_btn.click( fn=select_all_tags, outputs=[tag_selection] ) clear_all_btn.click( fn=clear_all_tags, outputs=[tag_selection] ) with gr.Column(scale=3): # Results container with tabs with gr.Tabs() as output_tabs: with gr.Tab("Tagged View", id="tagged-view"): no_results_html = gr.HTML( "
" "Enter text and click 'Tag Text' to analyze.
", visible=True ) output_html = gr.HTML( label="POS Tags", elem_id="pos-output-html", visible=False ) with gr.Tab("Table View", id="table-view"): no_results_table = gr.HTML( "
" "Enter text and click 'Tag Text' to analyze.
", visible=True ) output_table = gr.Dataframe( label="POS Tags", headers=["Token", "POS Tag"], datatype=["str", "str"], interactive=False, wrap=True, elem_id="pos-output-table", visible=False ) # Add CSS for the POS tags (scoped to this component) gr.HTML(""" """) def format_pos_result(result, selected_tags=None): import html if not result or "tokens" not in result or "tags" not in result: return "
No POS tags found or invalid result format.
", pd.DataFrame(columns=["Token", "POS Tag"]) if selected_tags is None: selected_tags = list(POS_TAG_DESCRIPTIONS.keys()) pos_colors = { "NOUN": "#e3f2fd", "VERB": "#e8f5e9", "ADJ": "#fff8e1", "ADV": "#f3e5f5", "PRON": "#e8eaf6", "DET": "#e0f7fa", "ADP": "#f1f8e9", "CONJ": "#fce4ec", "CCONJ": "#fce4ec", "SCONJ": "#fce4ec", "NUM": "#e8f5e9", "PUNCT": "#f5f5f5", "X": "#fafafa", "SYM": "#fafafa", "PROPN": "#e1f5fe", "AUX": "#f3e5f5", "PART": "#f1f8e9", "INTJ": "#fff3e0" } html_parts = ['
'] df_data = [] for word, tag in zip(result["tokens"], result["tags"]): clean_tag = tag.split('-')[0].split('_')[0].upper() if clean_tag not in STANDARD_POS_TAGS: clean_tag = "X" df_data.append({"Token": word, "POS Tag": clean_tag}) if clean_tag not in selected_tags: html_parts.append(f'{html.escape(word)} ') continue color = pos_colors.get(clean_tag, "#f0f0f0") html_parts.append(f'') html_parts.append(f'{html.escape(word)} ') html_parts.append(f'{clean_tag}') html_parts.append('') html_parts.append('
') import pandas as pd df = pd.DataFrame(df_data) if selected_tags is not None: df = df[df["POS Tag"].isin(selected_tags)] df = df.reset_index(drop=True) return "".join(html_parts), df def process_pos(text: str, model: str, custom_instructions: str, selected_tags: list): if not text.strip(): return [ gr.HTML("
Please enter some text to analyze.
", visible=True), gr.HTML(visible=False), # no_results_html gr.DataFrame(visible=False), # output_table gr.HTML(visible=False) # no_results_table ] use_llm = is_llm_model(model) if not selected_tags: selected_tags = list(POS_TAG_DESCRIPTIONS.keys()) try: yield [ gr.HTML("
Processing... This may take a moment for large texts.
", visible=True), gr.HTML(visible=False), # no_results_html gr.DataFrame(visible=False), # output_table gr.HTML(visible=False) # no_results_table ] result = pos_tagging( text=text, model=model, custom_instructions=custom_instructions if use_llm else "", use_llm=use_llm ) if "error" in result: error_msg = result['error'] if "API key" in error_msg or "authentication" in error_msg.lower(): error_msg += " Please check your API key configuration." yield [ gr.HTML(f"
{error_msg}
", visible=True), gr.HTML(visible=False), # no_results_html gr.DataFrame(visible=False), # output_table gr.HTML(visible=False) # no_results_table ] return html, table = format_pos_result(result, selected_tags) if not table.empty: yield [ gr.HTML(html, visible=True), # output_html gr.HTML(visible=False), # no_results_html gr.DataFrame(value=table, visible=True), # output_table gr.HTML(visible=False) # no_results_table ] else: empty_msg = "
No POS tags could be extracted from the text.
" yield [ gr.HTML(empty_msg, visible=True), # output_html gr.HTML(visible=False), # no_results_html gr.DataFrame(visible=False), # output_table gr.HTML(empty_msg, visible=True) # no_results_table ] except Exception as e: import traceback error_msg = f"Error processing request: {str(e)}\n\n{traceback.format_exc()}" print(error_msg) # Log the full error yield [ gr.HTML("
An error occurred while processing your request. Please try again.
", visible=True), gr.HTML(visible=False), # no_results_html gr.DataFrame(visible=False), # output_table gr.HTML(visible=False) # no_results_table ] def update_ui(model_name: str) -> Dict: use_llm = is_llm_model(model_name) return { custom_instructions: gr.Textbox(visible=use_llm) } def clear_inputs(): return "", "", "" model_dropdown.change( fn=update_ui, inputs=[model_dropdown], outputs=[custom_instructions] ) submit_btn.click( fn=process_pos, inputs=[input_text, model_dropdown, custom_instructions, tag_selection], outputs=[output_html, no_results_html, output_table, no_results_table], show_progress=True ) gr.HTML(""" """) custom_instructions.visible = is_llm_model(DEFAULT_MODEL) return None