space_22

Running

App Files Files Community

space_22 / app.py

Frenchizer

Update app.py

4235ba5 verified 17 days ago

raw

history blame

2.64 kB

	import gradio as gr
	from transformers import pipeline
	import spacy
	from textblob import TextBlob
	import json
	import requests

	# Initialize models
	nlp = spacy.load("en_core_web_sm") # Use "en_core_web_trf" if more accuracy is needed
	spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")

	def preprocess_and_forward(text: str) -> str:
	processed_text, preprocessing_results = preprocess_text(text)

	try:
	# Forward preprocessed text to context detection (space_9)
	context_response = requests.post(
	"https://api.gradio.app/v2/Frenchizer/space_9/predict",
	json={"data": [processed_text]}
	).json()

	if "error" in context_response:
	return json.dumps({
	"error": "Context detection failed",
	"preprocessing_results": preprocessing_results
	})

	context = context_response["data"][0]

	# Return preprocessing and detected context
	result = {
	"preprocessing": preprocessing_results,
	"context": context
	}
	return json.dumps(result)

	except Exception as e:
	return json.dumps({
	"error": str(e),
	"preprocessing_results": preprocessing_results
	})

	def preprocess_text(text: str):
	result = {
	"spell_suggestions": [],
	"entities": [],
	"tags": []
	}

	# Basic spell checking using TextBlob
	corrected_text = str(TextBlob(text).correct())
	if corrected_text != text:
	result["spell_suggestions"].append({
	"original": text,
	"corrected": corrected_text
	})

	# Transformer-based spell check
	spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
	if spell_checked != text and spell_checked != corrected_text:
	result["spell_suggestions"].append({
	"original": text,
	"corrected": spell_checked
	})

	# NER with spaCy
	doc = nlp(text)
	result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]

	# Extract potential tags (hashtags, mentions, etc.)
	result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]

	return text, result

	# Gradio interface
	with gr.Blocks() as demo:
	input_text = gr.Textbox(label="Input Text")
	output_json = gr.JSON(label="Processing Results")
	preprocess_button = gr.Button("Process")
	preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_json])

	if __name__ == "__main__":
	demo.launch()