huamnifierWithSimpleGrammer

Running

File size: 5,199 Bytes

b0503ee
1fb0b90
84669bc
b7577da
7feda08
7fc55d1
 
6d0ac04
1fb0b90
7fc55d1
51568dc
6ba2176
1fb0b90
6ba2176
 
 
c163eb2
6ba2176
7feda08
1fb0b90
 
 
 
f79e1dd
1fb0b90
 
 
f036c05
f79e1dd
f036c05
15f7e94
 
 
 
e00f367
f79e1dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e00f367
f79e1dd
e00f367
 
f79e1dd
e00f367
 
 
 
 
 
 
 
 
 
f79e1dd
e00f367
 
 
f79e1dd
e00f367
 
 
a456e86
 
 
e00f367
 
 
 
 
f79e1dd
e00f367
 
 
 
de007f1
a456e86
 
 
 
de007f1
a456e86
 
 
 
e00f367
 
 
 
a456e86
f79e1dd
a456e86
 
 
 
 
 
 
 
 
 
 
 
 
 
847e3e1
e5063d8
f79e1dd
 
 
 
 
 
 
 
 
c163eb2
f79e1dd
 
 
 
 
 
 
 
 
 
 
 
 
aed9390
2f3beab

import os
import subprocess
import gradio as gr
from transformers import pipeline
import spacy
import nltk
from nltk.corpus import wordnet

# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')

# Ensure the SpaCy model is installed
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp = spacy.load("en_core_web_sm")

# Initialize the English text classification pipeline for AI detection
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")

def predict_en(text):
    """Function to predict the label and score for English text (AI Detection)"""
    res = pipeline_en(text)[0]
    return res['label'], res['score']

def get_synonyms_nltk(word, pos):
    """Function to get synonyms using NLTK WordNet"""
    synsets = wordnet.synsets(word, pos=pos)
    if synsets:
        lemmas = synsets[0].lemmas()
        return [lemma.name() for lemma in lemmas]
    return []

def rephrase_text(text):
    """Function to rephrase text by replacing words with synonyms"""
    doc = nlp(text)
    rephrased_text = []

    for token in doc:
        if token.pos_ in ["NOUN", "VERB", "ADJ"]:
            synonyms = get_synonyms_nltk(token.text, pos=token.pos_.lower())
            if synonyms:
                rephrased_text.append(synonyms[0])  # Replace with first synonym found
            else:
                rephrased_text.append(token.text)
        else:
            rephrased_text.append(token.text)
    
    return ' '.join(rephrased_text)

def capitalize_sentences_and_nouns(text):
    """Function to capitalize the first letter of sentences and proper nouns"""
    doc = nlp(text)
    corrected_text = []

    for sent in doc.sents:
        sentence = []
        for token in sent:
            if token.i == sent.start:  # First word of the sentence
                sentence.append(token.text.capitalize())
            elif token.pos_ == "PROPN":  # Proper noun
                sentence.append(token.text.capitalize())
            else:
                sentence.append(token.text)
        corrected_text.append(' '.join(sentence))

    return ' '.join(corrected_text)

def correct_tense_errors(text):
    """Function to correct tense errors in a sentence"""
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
            corrected_text.append(lemma)
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

def correct_singular_plural_errors(text):
    """Function to correct singular/plural errors"""
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.pos_ == "NOUN":
            if token.tag_ == "NN":  # Singular noun
                if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
                    corrected_text.append(token.lemma_ + 's')
                else:
                    corrected_text.append(token.text)
            elif token.tag_ == "NNS":  # Plural noun
                if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
                    corrected_text.append(token.lemma_)
                else:
                    corrected_text.append(token.text)
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

def correct_article_errors(text):
    """Function to check and correct article errors"""
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.text in ['a', 'an']:
            next_token = token.nbor(1)
            if token.text == "a" and next_token.text[0].lower() in "aeiou":
                corrected_text.append("an")
            elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
                corrected_text.append("a")
            else:
                corrected_text.append(token.text)
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

def paraphrase_and_correct(text):
    """Function to rephrase and correct grammar"""
    rephrased_text = rephrase_text(text)
    rephrased_text = capitalize_sentences_and_nouns(rephrased_text)  # Capitalize first to ensure proper noun capitalization
    rephrased_text = correct_article_errors(rephrased_text)
    rephrased_text = correct_tense_errors(rephrased_text)
    rephrased_text = correct_singular_plural_errors(rephrased_text)
    return rephrased_text

# Define Gradio interface
with gr.Blocks() as demo:
    with gr.Row():
        t1 = gr.Textbox(label="Input Text", lines=5)
        button1 = gr.Button("Process")
    with gr.Row():
        output_text = gr.Textbox(label="Processed Text", lines=5)
        label1 = gr.Label(label="AI Detection Label")
        score1 = gr.Label(label="AI Detection Score")
    
    button1.click(
        fn=lambda text: (paraphrase_and_correct(text), *predict_en(text)),
        inputs=[t1],
        outputs=[output_text, label1, score1]
    )

demo.launch()