space_19

Sleeping

App Files Files Community

Frenchizer commited on 13 days ago

Commit

4524238

verified ·

1 Parent(s): a11ae53

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -2

app.py CHANGED Viewed

@@ -3,19 +3,46 @@ from transformers import pipeline
 import spacy
 from textblob import TextBlob
 from gradio_client import Client
 # Initialize models
 nlp = spacy.load("en_core_web_sm")
 spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
 def preprocess_text(text: str):
-    """Process text and return corrections with position information"""
     result = {
         "spell_suggestions": [],
         "entities": [],
         "tags": []
     }
     # Find and record positions of corrections
     doc = nlp(text)
@@ -43,7 +70,7 @@ def preprocess_text(text: str):
     return text, result
 def preprocess_and_forward(text: str):
-    """Process text and forward to translation service"""
     original_text, preprocessing_result = preprocess_text(text)
     # Forward original text to translation service

 import spacy
 from textblob import TextBlob
 from gradio_client import Client
+import re
 # Initialize models
 nlp = spacy.load("en_core_web_sm")
 spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
+def preprocess_capitalization(text: str) -> str:
+    """Preprocess input text to handle capitalization rules."""
+    words = text.split(" ")
+    processed_words = []
+    for word in words:
+        # Check if the word is an acronym (all uppercase letters)
+        if re.match(r"^[A-Z]+$", word):
+            processed_words.append(word)  # Leave acronyms unchanged
+        # Check if the word has mixed capitalization (e.g., "HEllo")
+        elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
+            processed_words.append(word[0].upper() + word[1:].lower())  # Correct capitalization
+        else:
+            processed_words.append(word)  # Leave other words unchanged
+    return " ".join(processed_words)
 def preprocess_text(text: str):
+    """Process text and return corrections with position information."""
     result = {
         "spell_suggestions": [],
         "entities": [],
         "tags": []
     }
+    # Apply capitalization preprocessing
+    capitalized_text = preprocess_capitalization(text)
+    if capitalized_text != text:
+        result["spell_suggestions"].append({
+            "original": text,
+            "corrected": capitalized_text
+        })
+        text = capitalized_text  # Update text for further processing
     # Find and record positions of corrections
     doc = nlp(text)
     return text, result
 def preprocess_and_forward(text: str):
+    """Process text and forward to translation service."""
     original_text, preprocessing_result = preprocess_text(text)
     # Forward original text to translation service