sashtech commited on
Commit
a3485f7
·
verified ·
1 Parent(s): 7a350d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -23
app.py CHANGED
@@ -6,11 +6,19 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from gensim import downloader as api
9
- from textblob import TextBlob # Import TextBlob for grammar correction
 
 
 
 
 
 
 
10
 
11
  # Ensure necessary NLTK data is downloaded
12
  nltk.download('wordnet')
13
  nltk.download('omw-1.4')
 
14
 
15
  # Ensure the spaCy model is installed
16
  try:
@@ -29,21 +37,13 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
30
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
31
 
32
- # Function to correct grammar using TextBlob
33
- def correct_grammar_with_textblob(text):
34
- blob = TextBlob(text)
35
- corrected_text = str(blob.correct())
 
36
  return corrected_text
37
 
38
- # AI detection function using DistilBERT
39
- def detect_ai_generated(text):
40
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
41
- with torch.no_grad():
42
- outputs = model(**inputs)
43
- probabilities = torch.softmax(outputs.logits, dim=1)
44
- ai_probability = probabilities[0][1].item() # Probability of being AI-generated
45
- return f"AI-Generated Content Probability: {ai_probability:.2f}%"
46
-
47
  # Function to get synonyms using NLTK WordNet
48
  def get_synonyms_nltk(word, pos):
49
  synsets = wordnet.synsets(word, pos=pos)
@@ -52,13 +52,12 @@ def get_synonyms_nltk(word, pos):
52
  return [lemma.name() for lemma in lemmas]
53
  return []
54
 
55
- # Paraphrasing function using spaCy and NLTK with TextBlob grammar correction
56
  def paraphrase_with_spacy_nltk(text):
57
  doc = nlp(text)
58
  paraphrased_words = []
59
 
60
  for token in doc:
61
- # Map spaCy POS tags to WordNet POS tags
62
  pos = None
63
  if token.pos_ in {"NOUN"}:
64
  pos = wordnet.NOUN
@@ -71,19 +70,38 @@ def paraphrase_with_spacy_nltk(text):
71
 
72
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
73
 
74
- # Replace with a synonym only if it makes sense
75
  if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
76
  paraphrased_words.append(synonyms[0])
77
  else:
78
  paraphrased_words.append(token.text)
79
 
80
- # Join the words back into a sentence
81
  paraphrased_sentence = ' '.join(paraphrased_words)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- # Correct the grammar of the paraphrased sentence using TextBlob
84
- corrected_sentence = correct_grammar_with_textblob(paraphrased_sentence)
85
 
86
- return corrected_sentence
87
 
88
  # Gradio interface definition
89
  with gr.Blocks() as interface:
@@ -91,12 +109,12 @@ with gr.Blocks() as interface:
91
  with gr.Column():
92
  text_input = gr.Textbox(lines=5, label="Input Text")
93
  detect_button = gr.Button("AI Detection")
94
- paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK (Grammar Corrected with TextBlob)")
95
  with gr.Column():
96
  output_text = gr.Textbox(label="Output")
97
 
98
  detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
99
- paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
100
 
101
  # Launch the Gradio app
102
  interface.launch(debug=False)
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from gensim import downloader as api
9
+ import language_tool_python
10
+
11
+ # Install Java
12
+ def install_java():
13
+ subprocess.run(["apt-get", "update"])
14
+ subprocess.run(["apt-get", "install", "-y", "openjdk-11-jre"])
15
+
16
+ install_java()
17
 
18
  # Ensure necessary NLTK data is downloaded
19
  nltk.download('wordnet')
20
  nltk.download('omw-1.4')
21
+ nltk.download('punkt') # Download the Punkt tokenizer for sentence tokenization
22
 
23
  # Ensure the spaCy model is installed
24
  try:
 
37
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
38
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
39
 
40
+ # Function to correct grammar using LanguageTool
41
+ def correct_grammar_with_languagetool(text):
42
+ tool = language_tool_python.LanguageTool('en-US')
43
+ matches = tool.check(text)
44
+ corrected_text = language_tool_python.utils.correct(text, matches)
45
  return corrected_text
46
 
 
 
 
 
 
 
 
 
 
47
  # Function to get synonyms using NLTK WordNet
48
  def get_synonyms_nltk(word, pos):
49
  synsets = wordnet.synsets(word, pos=pos)
 
52
  return [lemma.name() for lemma in lemmas]
53
  return []
54
 
55
+ # Paraphrasing function using spaCy and NLTK
56
  def paraphrase_with_spacy_nltk(text):
57
  doc = nlp(text)
58
  paraphrased_words = []
59
 
60
  for token in doc:
 
61
  pos = None
62
  if token.pos_ in {"NOUN"}:
63
  pos = wordnet.NOUN
 
70
 
71
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
72
 
 
73
  if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
74
  paraphrased_words.append(synonyms[0])
75
  else:
76
  paraphrased_words.append(token.text)
77
 
 
78
  paraphrased_sentence = ' '.join(paraphrased_words)
79
+ return paraphrased_sentence
80
+
81
+ # Sentence structuring using NLTK
82
+ def structure_sentences(text):
83
+ sentences = nltk.sent_tokenize(text) # Tokenize text into sentences
84
+ structured_sentences = []
85
+
86
+ for sentence in sentences:
87
+ # Here you can apply any structuring rules or logic you need.
88
+ structured_sentences.append(sentence)
89
+
90
+ structured_text = ' '.join(structured_sentences)
91
+ return structured_text
92
+
93
+ # Combined function: Paraphrase -> Structure -> Grammar Check
94
+ def humanize_text(text):
95
+ # Step 1: Paraphrase
96
+ paraphrased_text = paraphrase_with_spacy_nltk(text)
97
+
98
+ # Step 2: Structure sentences
99
+ structured_text = structure_sentences(paraphrased_text)
100
 
101
+ # Step 3: Apply grammar correction
102
+ final_text = correct_grammar_with_languagetool(structured_text)
103
 
104
+ return final_text
105
 
106
  # Gradio interface definition
107
  with gr.Blocks() as interface:
 
109
  with gr.Column():
110
  text_input = gr.Textbox(lines=5, label="Input Text")
111
  detect_button = gr.Button("AI Detection")
112
+ humanize_button = gr.Button("Humanize Text")
113
  with gr.Column():
114
  output_text = gr.Textbox(label="Output")
115
 
116
  detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
117
+ humanize_button.click(humanize_text, inputs=text_input, outputs=output_text)
118
 
119
  # Launch the Gradio app
120
  interface.launch(debug=False)