import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import spacy import subprocess import nltk from nltk.corpus import wordnet from gensim import downloader as api from autocorrect import Speller # Autocorrect library for spelling correction from gingerit.gingerit import GingerIt # GingerIt for grammar correction # Ensure necessary NLTK data is downloaded nltk.download('wordnet') nltk.download('omw-1.4') # Ensure the spaCy model is installed try: nlp = spacy.load("en_core_web_sm") except OSError: subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) nlp = spacy.load("en_core_web_sm") # Load a smaller Word2Vec model from Gensim's pre-trained models word_vectors = api.load("glove-wiki-gigaword-50") # Check for GPU and set the device accordingly device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load AI Detector model and tokenizer from Hugging Face (DistilBERT) tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device) # Initialize Autocorrect for spelling correction spell = Speller() # Initialize GingerIt for grammar correction parser = GingerIt() # AI detection function using DistilBERT def detect_ai_generated(text): inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device) with torch.no_grad(): outputs = model_ai(**inputs) probabilities = torch.softmax(outputs.logits, dim=1) ai_probability = probabilities[0][1].item() # Probability of being AI-generated return f"AI-Generated Content Probability: {ai_probability * 100:.2f}%" # Function to get synonyms using NLTK WordNet def get_synonyms_nltk(word, pos): synsets = wordnet.synsets(word, pos=pos) if synsets: lemmas = synsets[0].lemmas() return [lemma.name() for lemma in lemmas] return [] # Function to check and correct tenses and verbs using spaCy def check_tense_and_correct(text): doc = nlp(text) corrected_text = [] for token in doc: # Checking for verbs and their tense if token.pos_ == 'VERB': tense = token.tag_ # Get the specific tense tag (e.g., VBZ, VBD, VBG, etc.) if tense == 'VBZ': # 3rd person singular present corrected_text.append(token.lemma_) # Replace with base form (example: goes -> go) elif tense == 'VBD': # Past tense corrected_text.append(token.text) # Keep past tense as is elif tense == 'VBG': # Gerund/Present participle corrected_text.append(token.text) # Keep it unchanged for now else: corrected_text.append(token.text) # For other cases, append the word as is else: corrected_text.append(token.text) return ' '.join(corrected_text) # Function to capitalize the first letter of sentences and proper nouns def capitalize_sentences_and_nouns(text): doc = nlp(text) corrected_text = [] for sent in doc.sents: sentence = [] for token in sent: if token.i == sent.start: # First word of the sentence sentence.append(token.text.capitalize()) elif token.pos_ == "PROPN": # Proper noun sentence.append(token.text.capitalize()) else: sentence.append(token.text) corrected_text.append(' '.join(sentence)) return ' '.join(corrected_text) # Paraphrasing function using spaCy and NLTK def paraphrase_with_spacy_nltk(text): doc = nlp(text) paraphrased_words = [] for token in doc: # Map spaCy POS tags to WordNet POS tags pos = None if token.pos_ in {"NOUN"}: pos = wordnet.NOUN elif token.pos_ in {"VERB"}: pos = wordnet.VERB elif token.pos_ in {"ADJ"}: pos = wordnet.ADJ elif token.pos_ in {"ADV"}: pos = wordnet.ADV synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else [] # Replace with a synonym only if it makes sense if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower(): paraphrased_words.append(synonyms[0]) else: paraphrased_words.append(token.text) # Join the words back into a sentence paraphrased_sentence = ' '.join(paraphrased_words) # Capitalize sentences and proper nouns corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence) return corrected_text # Function to correct spelling using autocorrect and grammar using GingerIt def correct_spelling_and_grammar(text): # Step 1: Correct spelling using autocorrect corrected_spelling = spell(text) # Step 2: Correct grammar using GingerIt grammar_correction = parser.parse(corrected_spelling) return grammar_correction['result'] # Combined function: Paraphrase -> Tense Check -> Capitalization -> Spelling and Grammar Correction def paraphrase_and_correct(text): # Step 1: Paraphrase the text paraphrased_text = paraphrase_with_spacy_nltk(text) # Step 2: Check tense and verbs, and attempt correction tense_checked_text = check_tense_and_correct(paraphrased_text) # Step 3: Capitalize sentences and proper nouns capitalized_text = capitalize_sentences_and_nouns(tense_checked_text) # Step 4: Correct spelling and grammar final_text = correct_spelling_and_grammar(capitalized_text) return final_text # Gradio interface definition with gr.Blocks() as interface: with gr.Row(): with gr.Column(): text_input = gr.Textbox(lines=5, label="Input Text") detect_button = gr.Button("AI Detection") paraphrase_button = gr.Button("Paraphrase & Correct") with gr.Column(): output_text = gr.Textbox(label="Output") detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text) paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text) # Launch the Gradio app interface.launch(debug=False)