Frenchizer commited on
Commit
11c10f2
·
verified ·
1 Parent(s): ee21c59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -44
app.py CHANGED
@@ -5,65 +5,61 @@ from textblob import TextBlob
5
  from gradio_client import Client
6
 
7
  # Initialize models
8
- nlp = spacy.load("en_core_web_sm") # NER model
9
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
10
 
11
  def preprocess_text(text: str):
12
- """
13
- Applies spell-checking and named entity recognition (NER) to preprocess text.
14
- Returns token-level suggestions.
15
- """
16
- tokens = text.split()
17
- suggestions = []
18
-
19
- for token in tokens:
20
- token_suggestions = {"original": token, "suggestions": []}
21
-
22
- # Basic spell checking
23
- corrected = str(TextBlob(token).correct())
24
- if corrected != token:
25
- token_suggestions["suggestions"].append(corrected)
26
-
27
- # Transformer-based spell checking
28
- spell_checked = spell_checker(token, max_length=20)[0]['generated_text']
29
- if spell_checked != token and spell_checked not in token_suggestions["suggestions"]:
30
- token_suggestions["suggestions"].append(spell_checked)
31
-
32
- suggestions.append(token_suggestions)
33
-
34
- # Named Entity Recognition (NER)
35
  doc = nlp(text)
36
- entities = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
37
 
38
- return {"tokens": suggestions, "entities": entities}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- def preprocess_and_forward(text: str):
41
- """
42
- Processes the input text, returns suggestions, and forwards the cleaned version for translation.
43
- """
44
- processed_data = preprocess_text(text)
45
- final_text = " ".join([t['suggestions'][0] if t['suggestions'] else t['original'] for t in processed_data["tokens"]])
46
- translation = forward_to_translation(final_text)
47
- return processed_data, translation # Unpacking dictionary values separately
48
 
49
- def forward_to_translation(text: str) -> str:
50
- """
51
- Sends preprocessed text for translation and returns only the translated text.
52
- """
 
 
 
53
  client = Client("Frenchizer/space_17")
54
  try:
55
- return client.predict(text)
 
56
  except Exception as e:
57
- return f"Error: {str(e)}"
58
 
59
  # Gradio interface
60
  with gr.Blocks() as demo:
61
  input_text = gr.Textbox(label="Input Text")
62
- output_text = gr.Textbox(label="Translated Text")
63
- suggestion_output = gr.JSON(label="Suggestions")
64
-
65
  preprocess_button = gr.Button("Process")
66
- preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[suggestion_output, output_text])
67
 
68
  if __name__ == "__main__":
69
  demo.launch()
 
5
  from gradio_client import Client
6
 
7
  # Initialize models
8
+ nlp = spacy.load("en_core_web_sm")
9
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
10
 
11
  def preprocess_text(text: str):
12
+ """Process text and return corrections with position information"""
13
+ result = {
14
+ "spell_suggestions": [],
15
+ "entities": [],
16
+ "tags": []
17
+ }
18
+
19
+ # Find and record positions of corrections
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  doc = nlp(text)
 
21
 
22
+ # TextBlob spell check with position tracking
23
+ blob = TextBlob(text)
24
+ corrected = str(blob.correct())
25
+ if corrected != text:
26
+ result["spell_suggestions"].append({
27
+ "original": text,
28
+ "corrected": corrected
29
+ })
30
+
31
+ # Transformer spell check
32
+ spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
33
+ if spell_checked != text and spell_checked != corrected:
34
+ result["spell_suggestions"].append({
35
+ "original": text,
36
+ "corrected": spell_checked
37
+ })
38
 
39
+ # Add entities and tags
40
+ result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
41
+ result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
 
 
 
 
 
42
 
43
+ return text, result
44
+
45
+ def preprocess_and_forward(text: str):
46
+ """Process text and forward to translation service"""
47
+ original_text, preprocessing_result = preprocess_text(text)
48
+
49
+ # Forward original text to translation service
50
  client = Client("Frenchizer/space_17")
51
  try:
52
+ translation = client.predict(original_text)
53
+ return [translation, preprocessing_result]
54
  except Exception as e:
55
+ return [f"Error: {str(e)}", preprocessing_result]
56
 
57
  # Gradio interface
58
  with gr.Blocks() as demo:
59
  input_text = gr.Textbox(label="Input Text")
60
+ output_text = gr.Textbox(label="Output Text")
 
 
61
  preprocess_button = gr.Button("Process")
62
+ preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_text])
63
 
64
  if __name__ == "__main__":
65
  demo.launch()