Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,65 +5,61 @@ from textblob import TextBlob
|
|
5 |
from gradio_client import Client
|
6 |
|
7 |
# Initialize models
|
8 |
-
nlp = spacy.load("en_core_web_sm")
|
9 |
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
|
10 |
|
11 |
def preprocess_text(text: str):
|
12 |
-
"""
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
token_suggestions = {"original": token, "suggestions": []}
|
21 |
-
|
22 |
-
# Basic spell checking
|
23 |
-
corrected = str(TextBlob(token).correct())
|
24 |
-
if corrected != token:
|
25 |
-
token_suggestions["suggestions"].append(corrected)
|
26 |
-
|
27 |
-
# Transformer-based spell checking
|
28 |
-
spell_checked = spell_checker(token, max_length=20)[0]['generated_text']
|
29 |
-
if spell_checked != token and spell_checked not in token_suggestions["suggestions"]:
|
30 |
-
token_suggestions["suggestions"].append(spell_checked)
|
31 |
-
|
32 |
-
suggestions.append(token_suggestions)
|
33 |
-
|
34 |
-
# Named Entity Recognition (NER)
|
35 |
doc = nlp(text)
|
36 |
-
entities = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
37 |
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
|
41 |
-
"""
|
42 |
-
|
43 |
-
"""
|
44 |
-
processed_data = preprocess_text(text)
|
45 |
-
final_text = " ".join([t['suggestions'][0] if t['suggestions'] else t['original'] for t in processed_data["tokens"]])
|
46 |
-
translation = forward_to_translation(final_text)
|
47 |
-
return processed_data, translation # Unpacking dictionary values separately
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
"""
|
|
|
|
|
|
|
53 |
client = Client("Frenchizer/space_17")
|
54 |
try:
|
55 |
-
|
|
|
56 |
except Exception as e:
|
57 |
-
return f"Error: {str(e)}"
|
58 |
|
59 |
# Gradio interface
|
60 |
with gr.Blocks() as demo:
|
61 |
input_text = gr.Textbox(label="Input Text")
|
62 |
-
output_text = gr.Textbox(label="
|
63 |
-
suggestion_output = gr.JSON(label="Suggestions")
|
64 |
-
|
65 |
preprocess_button = gr.Button("Process")
|
66 |
-
preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[
|
67 |
|
68 |
if __name__ == "__main__":
|
69 |
demo.launch()
|
|
|
5 |
from gradio_client import Client
|
6 |
|
7 |
# Initialize models
|
8 |
+
nlp = spacy.load("en_core_web_sm")
|
9 |
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
|
10 |
|
11 |
def preprocess_text(text: str):
|
12 |
+
"""Process text and return corrections with position information"""
|
13 |
+
result = {
|
14 |
+
"spell_suggestions": [],
|
15 |
+
"entities": [],
|
16 |
+
"tags": []
|
17 |
+
}
|
18 |
+
|
19 |
+
# Find and record positions of corrections
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
doc = nlp(text)
|
|
|
21 |
|
22 |
+
# TextBlob spell check with position tracking
|
23 |
+
blob = TextBlob(text)
|
24 |
+
corrected = str(blob.correct())
|
25 |
+
if corrected != text:
|
26 |
+
result["spell_suggestions"].append({
|
27 |
+
"original": text,
|
28 |
+
"corrected": corrected
|
29 |
+
})
|
30 |
+
|
31 |
+
# Transformer spell check
|
32 |
+
spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
|
33 |
+
if spell_checked != text and spell_checked != corrected:
|
34 |
+
result["spell_suggestions"].append({
|
35 |
+
"original": text,
|
36 |
+
"corrected": spell_checked
|
37 |
+
})
|
38 |
|
39 |
+
# Add entities and tags
|
40 |
+
result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
|
41 |
+
result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
return text, result
|
44 |
+
|
45 |
+
def preprocess_and_forward(text: str):
|
46 |
+
"""Process text and forward to translation service"""
|
47 |
+
original_text, preprocessing_result = preprocess_text(text)
|
48 |
+
|
49 |
+
# Forward original text to translation service
|
50 |
client = Client("Frenchizer/space_17")
|
51 |
try:
|
52 |
+
translation = client.predict(original_text)
|
53 |
+
return [translation, preprocessing_result]
|
54 |
except Exception as e:
|
55 |
+
return [f"Error: {str(e)}", preprocessing_result]
|
56 |
|
57 |
# Gradio interface
|
58 |
with gr.Blocks() as demo:
|
59 |
input_text = gr.Textbox(label="Input Text")
|
60 |
+
output_text = gr.Textbox(label="Output Text")
|
|
|
|
|
61 |
preprocess_button = gr.Button("Process")
|
62 |
+
preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_text])
|
63 |
|
64 |
if __name__ == "__main__":
|
65 |
demo.launch()
|