File size: 4,059 Bytes
a4b85c4
 
 
 
 
 
 
b5d0fef
9fc880b
a4b85c4
 
 
 
b5d0fef
a4b85c4
 
b5d0fef
a4b85c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5d0fef
a4b85c4
 
 
 
 
 
b5d0fef
a4b85c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5d0fef
a4b85c4
 
 
 
 
b5d0fef
a4b85c4
b5d0fef
a4b85c4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import random
import re
import string
import spacy
from nltk.corpus import wordnet
import nltk
import gradio as gr

# Ensure that necessary NLTK resources are downloaded
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Exclude tags and words (adjusted for better precision)
exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}

def get_synonyms(word):
    """Find synonyms for a given word considering the context."""
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
                synonyms.add(lemma.name())
    return synonyms

def replace_with_synonyms(word, pos_tag):
    """Replace words with synonyms, keeping the original POS tag."""
    synonyms = get_synonyms(word)
    # Filter by POS tag
    filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag]
    if filtered_synonyms:
        return random.choice(filtered_synonyms)
    return word

def improve_paraphrasing_and_grammar(text):
    """Paraphrase and correct grammatical errors in the text."""
    doc = nlp(text)
    corrected_text = []

    for sent in doc.sents:
        sentence = []
        for token in sent:
            # Replace words with synonyms, excluding special POS tags
            if token.tag_ not in exclude_tags and token.text.lower() not in exclude_words and token.text not in string.punctuation:
                synonym = replace_with_synonyms(token.text, token.tag_)
                sentence.append(synonym if synonym else token.text)
            else:
                sentence.append(token.text)

        corrected_text.append(' '.join(sentence))
    
    # Ensure proper punctuation and capitalization
    final_text = ' '.join(corrected_text)
    final_text = fix_possessives(final_text)
    final_text = fix_punctuation_spacing(final_text)
    final_text = capitalize_sentences(final_text)
    final_text = fix_article_errors(final_text)
    
    return final_text

def fix_punctuation_spacing(text):
    """Fix spaces before punctuation marks."""
    text = re.sub(r'\s+([,.!?])', r'\1', text)
    return text

def fix_possessives(text):
    """Correct possessives like 'John ' s' -> 'John's'."""
    return re.sub(r"(\w)\s?'\s?s", r"\1's", text)

def capitalize_sentences(text):
    """Capitalize the first letter of each sentence."""
    return '. '.join([s.capitalize() for s in re.split(r'(?<=\w[.!?])\s+', text)])

def fix_article_errors(text):
    """Correct 'a' and 'an' usage based on following word's sound."""
    doc = nlp(text)
    corrected = []
    for token in doc:
        if token.text in ('a', 'an'):
            next_token = token.nbor(1)
            if token.text == "a" and next_token.text[0].lower() in "aeiou":
                corrected.append("an")
            elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
                corrected.append("a")
            else:
                corrected.append(token.text)
        else:
            corrected.append(token.text)
    return ' '.join(corrected)

# Gradio app setup
def gradio_interface(text):
    """Gradio interface function to process the input text."""
    return improve_paraphrasing_and_grammar(text)

with gr.Blocks() as demo:
    gr.Markdown("## Text Paraphrasing and Grammar Correction")
    text_input = gr.Textbox(lines=10, label='Enter text for paraphrasing and grammar correction')
    text_output = gr.Textbox(lines=10, label='Corrected Text', interactive=False)
    submit_button = gr.Button("πŸ”„ Paraphrase and Correct")
    
    submit_button.click(fn=gradio_interface, inputs=text_input, outputs=text_output)

# Launch the Gradio app
demo.launch(share=True)