Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,19 +3,46 @@ from transformers import pipeline
|
|
3 |
import spacy
|
4 |
from textblob import TextBlob
|
5 |
from gradio_client import Client
|
|
|
6 |
|
7 |
# Initialize models
|
8 |
nlp = spacy.load("en_core_web_sm")
|
9 |
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def preprocess_text(text: str):
|
12 |
-
"""Process text and return corrections with position information"""
|
13 |
result = {
|
14 |
"spell_suggestions": [],
|
15 |
"entities": [],
|
16 |
"tags": []
|
17 |
}
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# Find and record positions of corrections
|
20 |
doc = nlp(text)
|
21 |
|
@@ -43,7 +70,7 @@ def preprocess_text(text: str):
|
|
43 |
return text, result
|
44 |
|
45 |
def preprocess_and_forward(text: str):
|
46 |
-
"""Process text and forward to translation service"""
|
47 |
original_text, preprocessing_result = preprocess_text(text)
|
48 |
|
49 |
# Forward original text to translation service
|
|
|
3 |
import spacy
|
4 |
from textblob import TextBlob
|
5 |
from gradio_client import Client
|
6 |
+
import re
|
7 |
|
8 |
# Initialize models
|
9 |
nlp = spacy.load("en_core_web_sm")
|
10 |
spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
|
11 |
|
12 |
+
def preprocess_capitalization(text: str) -> str:
|
13 |
+
"""Preprocess input text to handle capitalization rules."""
|
14 |
+
words = text.split(" ")
|
15 |
+
processed_words = []
|
16 |
+
|
17 |
+
for word in words:
|
18 |
+
# Check if the word is an acronym (all uppercase letters)
|
19 |
+
if re.match(r"^[A-Z]+$", word):
|
20 |
+
processed_words.append(word) # Leave acronyms unchanged
|
21 |
+
# Check if the word has mixed capitalization (e.g., "HEllo")
|
22 |
+
elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
|
23 |
+
processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
|
24 |
+
else:
|
25 |
+
processed_words.append(word) # Leave other words unchanged
|
26 |
+
|
27 |
+
return " ".join(processed_words)
|
28 |
+
|
29 |
def preprocess_text(text: str):
|
30 |
+
"""Process text and return corrections with position information."""
|
31 |
result = {
|
32 |
"spell_suggestions": [],
|
33 |
"entities": [],
|
34 |
"tags": []
|
35 |
}
|
36 |
|
37 |
+
# Apply capitalization preprocessing
|
38 |
+
capitalized_text = preprocess_capitalization(text)
|
39 |
+
if capitalized_text != text:
|
40 |
+
result["spell_suggestions"].append({
|
41 |
+
"original": text,
|
42 |
+
"corrected": capitalized_text
|
43 |
+
})
|
44 |
+
text = capitalized_text # Update text for further processing
|
45 |
+
|
46 |
# Find and record positions of corrections
|
47 |
doc = nlp(text)
|
48 |
|
|
|
70 |
return text, result
|
71 |
|
72 |
def preprocess_and_forward(text: str):
|
73 |
+
"""Process text and forward to translation service."""
|
74 |
original_text, preprocessing_result = preprocess_text(text)
|
75 |
|
76 |
# Forward original text to translation service
|