Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import spacy | |
from textblob import TextBlob | |
import json | |
import requests | |
# Initialize models | |
nlp = spacy.load("en_core_web_sm") # Use "en_core_web_trf" if more accuracy is needed | |
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base") | |
def preprocess_and_forward(text: str) -> str: | |
processed_text, preprocessing_results = preprocess_text(text) | |
try: | |
# Forward preprocessed text to context detection (space_9) | |
context_response = requests.post( | |
"https://api.gradio.app/v2/Frenchizer/space_9/predict", | |
json={"data": [processed_text]} | |
).json() | |
if "error" in context_response: | |
return json.dumps({ | |
"error": "Context detection failed", | |
"preprocessing_results": preprocessing_results | |
}) | |
context = context_response["data"][0] | |
# Return preprocessing and detected context | |
result = { | |
"preprocessing": preprocessing_results, | |
"context": context | |
} | |
return json.dumps(result) | |
except Exception as e: | |
return json.dumps({ | |
"error": str(e), | |
"preprocessing_results": preprocessing_results | |
}) | |
def preprocess_text(text: str): | |
result = { | |
"spell_suggestions": [], | |
"entities": [], | |
"tags": [] | |
} | |
# Basic spell checking using TextBlob | |
corrected_text = str(TextBlob(text).correct()) | |
if corrected_text != text: | |
result["spell_suggestions"].append({ | |
"original": text, | |
"corrected": corrected_text | |
}) | |
# Transformer-based spell check | |
spell_checked = spell_checker(text, max_length=512)[0]['generated_text'] | |
if spell_checked != text and spell_checked != corrected_text: | |
result["spell_suggestions"].append({ | |
"original": text, | |
"corrected": spell_checked | |
}) | |
# NER with spaCy | |
doc = nlp(text) | |
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents] | |
# Extract potential tags (hashtags, mentions, etc.) | |
result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))] | |
return text, result | |
# Gradio interface | |
with gr.Blocks() as demo: | |
input_text = gr.Textbox(label="Input Text") | |
output_json = gr.JSON(label="Processing Results") | |
preprocess_button = gr.Button("Process") | |
preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_json]) | |
if __name__ == "__main__": | |
demo.launch() | |