Frenchizer commited on
Commit
4524238
·
verified ·
1 Parent(s): a11ae53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -2
app.py CHANGED
@@ -3,19 +3,46 @@ from transformers import pipeline
3
  import spacy
4
  from textblob import TextBlob
5
  from gradio_client import Client
 
6
 
7
  # Initialize models
8
  nlp = spacy.load("en_core_web_sm")
9
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def preprocess_text(text: str):
12
- """Process text and return corrections with position information"""
13
  result = {
14
  "spell_suggestions": [],
15
  "entities": [],
16
  "tags": []
17
  }
18
 
 
 
 
 
 
 
 
 
 
19
  # Find and record positions of corrections
20
  doc = nlp(text)
21
 
@@ -43,7 +70,7 @@ def preprocess_text(text: str):
43
  return text, result
44
 
45
  def preprocess_and_forward(text: str):
46
- """Process text and forward to translation service"""
47
  original_text, preprocessing_result = preprocess_text(text)
48
 
49
  # Forward original text to translation service
 
3
  import spacy
4
  from textblob import TextBlob
5
  from gradio_client import Client
6
+ import re
7
 
8
  # Initialize models
9
  nlp = spacy.load("en_core_web_sm")
10
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
11
 
12
+ def preprocess_capitalization(text: str) -> str:
13
+ """Preprocess input text to handle capitalization rules."""
14
+ words = text.split(" ")
15
+ processed_words = []
16
+
17
+ for word in words:
18
+ # Check if the word is an acronym (all uppercase letters)
19
+ if re.match(r"^[A-Z]+$", word):
20
+ processed_words.append(word) # Leave acronyms unchanged
21
+ # Check if the word has mixed capitalization (e.g., "HEllo")
22
+ elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
23
+ processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
24
+ else:
25
+ processed_words.append(word) # Leave other words unchanged
26
+
27
+ return " ".join(processed_words)
28
+
29
  def preprocess_text(text: str):
30
+ """Process text and return corrections with position information."""
31
  result = {
32
  "spell_suggestions": [],
33
  "entities": [],
34
  "tags": []
35
  }
36
 
37
+ # Apply capitalization preprocessing
38
+ capitalized_text = preprocess_capitalization(text)
39
+ if capitalized_text != text:
40
+ result["spell_suggestions"].append({
41
+ "original": text,
42
+ "corrected": capitalized_text
43
+ })
44
+ text = capitalized_text # Update text for further processing
45
+
46
  # Find and record positions of corrections
47
  doc = nlp(text)
48
 
 
70
  return text, result
71
 
72
  def preprocess_and_forward(text: str):
73
+ """Process text and forward to translation service."""
74
  original_text, preprocessing_result = preprocess_text(text)
75
 
76
  # Forward original text to translation service