import gradio as gr from transformers import pipeline import spacy import language_tool_python import json import requests # Initialize models and tools nlp = spacy.load("en_core_web_sm") language_tool = language_tool_python.LanguageTool('en-US') spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base") def preprocess_and_forward(text: str) -> str: processed_text, preprocessing_results = preprocess_text(text) try: # Forward preprocessed text to context detection (space_9) context_response = requests.post( "https://api.gradio.app/v2/Frenchizer/space_9/predict", json={"data": [processed_text]} ).json() if "error" in context_response: return json.dumps({ "error": "Context detection failed", "preprocessing_results": preprocessing_results }) context = context_response["data"][0] # Return preprocessing and detected context result = { "preprocessing": preprocessing_results, "context": context } return json.dumps(result) except Exception as e: return json.dumps({ "error": str(e), "preprocessing_results": preprocessing_results }) def preprocess_text(text: str): result = { "corrections": [], "entities": [], "tags": [], "spell_suggestions": [] } # Spell checking matches = language_tool.check(text) for match in matches: if match.replacements: result["corrections"].append({ "original": match.context[match.offsetInContext:match.offsetInContext + match.errorLength], "suggestion": match.replacements[0] }) # Transformer-based spell check spell_checked = spell_checker(text, max_length=512)[0]['generated_text'] if spell_checked != text: result["spell_suggestions"].append({ "original": text, "corrected": spell_checked }) # NER with spaCy doc = nlp(text) result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents] # Extract potential tags result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))] return text, result # Gradio interface with gr.Blocks() as demo: input_text = gr.Textbox(label="Input Text") output_json = gr.JSON(label="Processing Results") preprocess_button = gr.Button("Process") preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_json]) if __name__ == "__main__": demo.launch()