huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Sep 2, 2024

Commit

73ae45e

verified ·

1 Parent(s): a5827d8

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -40

app.py CHANGED Viewed

@@ -6,20 +6,11 @@ import subprocess
 import nltk
 from nltk.corpus import wordnet
 from gensim import downloader as api
-import language_tool_python
-# Install Java
-def install_java():
-    subprocess.run(["apt-get", "update"])
-    subprocess.run(["apt-get", "install", "-y", "openjdk-11-jre"])
-install_java()
 # Ensure necessary NLTK data is downloaded
-nltk.data.path.append('/usr/local/share/nltk_data/')  # Ensure the correct path for NLTK data
 nltk.download('wordnet')
 nltk.download('omw-1.4')
-nltk.download('punkt')  # Download the Punkt tokenizer for sentence tokenization
 # Ensure the spaCy model is installed
 try:
@@ -38,13 +29,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
-# Function to correct grammar using LanguageTool
-def correct_grammar_with_languagetool(text):
-    tool = language_tool_python.LanguageTool('en-US')
-    matches = tool.check(text)
-    corrected_text = language_tool_python.utils.correct(text, matches)
-    return corrected_text
 # AI detection function using DistilBERT
 def detect_ai_generated(text):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
@@ -62,12 +46,13 @@ def get_synonyms_nltk(word, pos):
         return [lemma.name() for lemma in lemmas]
     return []
-# Paraphrasing function using spaCy and NLTK
 def paraphrase_with_spacy_nltk(text):
     doc = nlp(text)
     paraphrased_words = []
     for token in doc:
         pos = None
         if token.pos_ in {"NOUN"}:
             pos = wordnet.NOUN
@@ -80,38 +65,33 @@ def paraphrase_with_spacy_nltk(text):
         synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
         if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
             paraphrased_words.append(synonyms[0])
         else:
             paraphrased_words.append(token.text)
     paraphrased_sentence = ' '.join(paraphrased_words)
     return paraphrased_sentence
-# Sentence structuring using NLTK
-def structure_sentences(text):
-    sentences = nltk.sent_tokenize(text)  # Tokenize text into sentences
-    structured_sentences = []
-    for sentence in sentences:
-        # Here you can apply any structuring rules or logic you need.
-        structured_sentences.append(sentence)
-    structured_text = ' '.join(structured_sentences)
-    return structured_text
-# Combined function: Paraphrase -> Structure -> Grammar Check
-def humanize_text(text):
-    # Step 1: Paraphrase
     paraphrased_text = paraphrase_with_spacy_nltk(text)
-    # Step 2: Structure sentences
-    structured_text = structure_sentences(paraphrased_text)
-    # Step 3: Apply grammar correction
-    final_text = correct_grammar_with_languagetool(structured_text)
-    return final_text
 # Gradio interface definition
 with gr.Blocks() as interface:
@@ -119,12 +99,12 @@ with gr.Blocks() as interface:
         with gr.Column():
             text_input = gr.Textbox(lines=5, label="Input Text")
             detect_button = gr.Button("AI Detection")
-            humanize_button = gr.Button("Humanize Text")
         with gr.Column():
             output_text = gr.Textbox(label="Output")
     detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
-    humanize_button.click(humanize_text, inputs=text_input, outputs=output_text)
 # Launch the Gradio app
 interface.launch(debug=False)

 import nltk
 from nltk.corpus import wordnet
 from gensim import downloader as api
+import language_tool_python  # Import the grammar checking library
 # Ensure necessary NLTK data is downloaded
 nltk.download('wordnet')
 nltk.download('omw-1.4')
 # Ensure the spaCy model is installed
 try:
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
 # AI detection function using DistilBERT
 def detect_ai_generated(text):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
         return [lemma.name() for lemma in lemmas]
     return []
+# Paraphrasing function using spaCy and NLTK (without grammar correction)
 def paraphrase_with_spacy_nltk(text):
     doc = nlp(text)
     paraphrased_words = []
     for token in doc:
+        # Map spaCy POS tags to WordNet POS tags
         pos = None
         if token.pos_ in {"NOUN"}:
             pos = wordnet.NOUN
         synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
+        # Replace with a synonym only if it makes sense
         if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
             paraphrased_words.append(synonyms[0])
         else:
             paraphrased_words.append(token.text)
+    # Join the words back into a sentence
     paraphrased_sentence = ' '.join(paraphrased_words)
     return paraphrased_sentence
+# Grammar correction function using LanguageTool
+def correct_grammar(text):
+    tool = language_tool_python.LanguageTool('en-US')
+    matches = tool.check(text)
+    corrected_text = language_tool_python.utils.correct(text, matches)
+    return corrected_text
+# Combined function: Paraphrase -> Grammar Check
+def paraphrase_and_correct(text):
+    # Step 1: Paraphrase the text
     paraphrased_text = paraphrase_with_spacy_nltk(text)
+    # Step 2: Apply grammar correction
+    corrected_text = correct_grammar(paraphrased_text)
+    return corrected_text
 # Gradio interface definition
 with gr.Blocks() as interface:
         with gr.Column():
             text_input = gr.Textbox(lines=5, label="Input Text")
             detect_button = gr.Button("AI Detection")
+            paraphrase_button = gr.Button("Paraphrase & Correct Grammar")
         with gr.Column():
             output_text = gr.Textbox(label="Output")
     detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
+    paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
 # Launch the Gradio app
 interface.launch(debug=False)