Spaces:

sashdev
/

LTP

Runtime error

App Files Files Community

sashdev commited on Sep 27, 2024

Commit

a4b85c4

verified ·

1 Parent(s): 625eebf

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -37

app.py CHANGED Viewed

@@ -1,46 +1,110 @@
-# Imports
 import gradio as gr
-import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-# Load the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("prithivida/grammar_error_correcter_v1")
-model = AutoModelForSeq2SeqLM.from_pretrained("prithivida/grammar_error_correcter_v1")
-# Use GPU if available
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model.to(device)
-# Grammar correction function
-def correct_grammar(text):
-    # Tokenize input text with an increased max_length for handling larger input
-    inputs = tokenizer([text], return_tensors="pt", padding=True, truncation=True, max_length=1024).to(device)
-    # Generate corrected text with increased max_length and num_beams
-    outputs = model.generate(**inputs, max_length=2024, num_beams=5, early_stopping=True)
-    # Decode the output and return the corrected text
-    corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return corrected_text
-# Gradio interface function
-def correct_grammar_interface(text):
-    corrected_text = correct_grammar(text)
-    return corrected_text
-# Gradio app interface
-with gr.Blocks() as grammar_app:
-    gr.Markdown("<h1>Grammar Correction App (up to 300 words)</h1>")
-    with gr.Row():
-        input_box = gr.Textbox(label="Input Text", placeholder="Enter text (up to 300 words)", lines=10)
-        output_box = gr.Textbox(label="Corrected Text", placeholder="Corrected text will appear here", lines=10)
-    submit_button = gr.Button("Correct Grammar")
-    # Bind the button click to the grammar correction function
-    submit_button.click(fn=correct_grammar_interface, inputs=input_box, outputs=output_box)
-# Launch the app
-if __name__ == "__main__":
-    grammar_app.launch()

+import os
+import random
+import re
+import string
+import spacy
+from nltk.corpus import wordnet
+import nltk
 import gradio as gr
+# Ensure that necessary NLTK resources are downloaded
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+nltk.download('wordnet')
+# Load SpaCy model
+nlp = spacy.load("en_core_web_sm")
+# Exclude tags and words (adjusted for better precision)
+exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
+exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
+def get_synonyms(word):
+    """Find synonyms for a given word considering the context."""
+    synonyms = set()
+    for syn in wordnet.synsets(word):
+        for lemma in syn.lemmas():
+            if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
+                synonyms.add(lemma.name())
+    return synonyms
+def replace_with_synonyms(word, pos_tag):
+    """Replace words with synonyms, keeping the original POS tag."""
+    synonyms = get_synonyms(word)
+    # Filter by POS tag
+    filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag]
+    if filtered_synonyms:
+        return random.choice(filtered_synonyms)
+    return word
+def improve_paraphrasing_and_grammar(text):
+    """Paraphrase and correct grammatical errors in the text."""
+    doc = nlp(text)
+    corrected_text = []
+    for sent in doc.sents:
+        sentence = []
+        for token in sent:
+            # Replace words with synonyms, excluding special POS tags
+            if token.tag_ not in exclude_tags and token.text.lower() not in exclude_words and token.text not in string.punctuation:
+                synonym = replace_with_synonyms(token.text, token.tag_)
+                sentence.append(synonym if synonym else token.text)
+            else:
+                sentence.append(token.text)
+        corrected_text.append(' '.join(sentence))
+    # Ensure proper punctuation and capitalization
+    final_text = ' '.join(corrected_text)
+    final_text = fix_possessives(final_text)
+    final_text = fix_punctuation_spacing(final_text)
+    final_text = capitalize_sentences(final_text)
+    final_text = fix_article_errors(final_text)
+    return final_text
+def fix_punctuation_spacing(text):
+    """Fix spaces before punctuation marks."""
+    text = re.sub(r'\s+([,.!?])', r'\1', text)
+    return text
+def fix_possessives(text):
+    """Correct possessives like 'John ' s' -> 'John's'."""
+    return re.sub(r"(\w)\s?'\s?s", r"\1's", text)
+def capitalize_sentences(text):
+    """Capitalize the first letter of each sentence."""
+    return '. '.join([s.capitalize() for s in re.split(r'(?<=\w[.!?])\s+', text)])
+def fix_article_errors(text):
+    """Correct 'a' and 'an' usage based on following word's sound."""
+    doc = nlp(text)
+    corrected = []
+    for token in doc:
+        if token.text in ('a', 'an'):
+            next_token = token.nbor(1)
+            if token.text == "a" and next_token.text[0].lower() in "aeiou":
+                corrected.append("an")
+            elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
+                corrected.append("a")
+            else:
+                corrected.append(token.text)
+        else:
+            corrected.append(token.text)
+    return ' '.join(corrected)
+# Gradio app setup
+def gradio_interface(text):
+    """Gradio interface function to process the input text."""
+    return improve_paraphrasing_and_grammar(text)
+with gr.Blocks() as demo:
+    gr.Markdown("## Text Paraphrasing and Grammar Correction")
+    text_input = gr.Textbox(lines=10, label='Enter text for paraphrasing and grammar correction')
+    text_output = gr.Textbox(lines=10, label='Corrected Text', interactive=False)
+    submit_button = gr.Button("🔄 Paraphrase and Correct")
+    submit_button.click(fn=gradio_interface, inputs=text_input, outputs=text_output)
+# Launch the Gradio app
+demo.launch(share=True)