sashtech commited on
Commit
73ae45e
·
verified ·
1 Parent(s): a5827d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -40
app.py CHANGED
@@ -6,20 +6,11 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from gensim import downloader as api
9
- import language_tool_python
10
-
11
- # Install Java
12
- def install_java():
13
- subprocess.run(["apt-get", "update"])
14
- subprocess.run(["apt-get", "install", "-y", "openjdk-11-jre"])
15
-
16
- install_java()
17
 
18
  # Ensure necessary NLTK data is downloaded
19
- nltk.data.path.append('/usr/local/share/nltk_data/') # Ensure the correct path for NLTK data
20
  nltk.download('wordnet')
21
  nltk.download('omw-1.4')
22
- nltk.download('punkt') # Download the Punkt tokenizer for sentence tokenization
23
 
24
  # Ensure the spaCy model is installed
25
  try:
@@ -38,13 +29,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
39
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
40
 
41
- # Function to correct grammar using LanguageTool
42
- def correct_grammar_with_languagetool(text):
43
- tool = language_tool_python.LanguageTool('en-US')
44
- matches = tool.check(text)
45
- corrected_text = language_tool_python.utils.correct(text, matches)
46
- return corrected_text
47
-
48
  # AI detection function using DistilBERT
49
  def detect_ai_generated(text):
50
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
@@ -62,12 +46,13 @@ def get_synonyms_nltk(word, pos):
62
  return [lemma.name() for lemma in lemmas]
63
  return []
64
 
65
- # Paraphrasing function using spaCy and NLTK
66
  def paraphrase_with_spacy_nltk(text):
67
  doc = nlp(text)
68
  paraphrased_words = []
69
 
70
  for token in doc:
 
71
  pos = None
72
  if token.pos_ in {"NOUN"}:
73
  pos = wordnet.NOUN
@@ -80,38 +65,33 @@ def paraphrase_with_spacy_nltk(text):
80
 
81
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
82
 
 
83
  if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
84
  paraphrased_words.append(synonyms[0])
85
  else:
86
  paraphrased_words.append(token.text)
87
 
 
88
  paraphrased_sentence = ' '.join(paraphrased_words)
 
89
  return paraphrased_sentence
90
 
91
- # Sentence structuring using NLTK
92
- def structure_sentences(text):
93
- sentences = nltk.sent_tokenize(text) # Tokenize text into sentences
94
- structured_sentences = []
95
-
96
- for sentence in sentences:
97
- # Here you can apply any structuring rules or logic you need.
98
- structured_sentences.append(sentence)
99
-
100
- structured_text = ' '.join(structured_sentences)
101
- return structured_text
102
 
103
- # Combined function: Paraphrase -> Structure -> Grammar Check
104
- def humanize_text(text):
105
- # Step 1: Paraphrase
106
  paraphrased_text = paraphrase_with_spacy_nltk(text)
107
 
108
- # Step 2: Structure sentences
109
- structured_text = structure_sentences(paraphrased_text)
110
 
111
- # Step 3: Apply grammar correction
112
- final_text = correct_grammar_with_languagetool(structured_text)
113
-
114
- return final_text
115
 
116
  # Gradio interface definition
117
  with gr.Blocks() as interface:
@@ -119,12 +99,12 @@ with gr.Blocks() as interface:
119
  with gr.Column():
120
  text_input = gr.Textbox(lines=5, label="Input Text")
121
  detect_button = gr.Button("AI Detection")
122
- humanize_button = gr.Button("Humanize Text")
123
  with gr.Column():
124
  output_text = gr.Textbox(label="Output")
125
 
126
  detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
127
- humanize_button.click(humanize_text, inputs=text_input, outputs=output_text)
128
 
129
  # Launch the Gradio app
130
  interface.launch(debug=False)
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from gensim import downloader as api
9
+ import language_tool_python # Import the grammar checking library
 
 
 
 
 
 
 
10
 
11
  # Ensure necessary NLTK data is downloaded
 
12
  nltk.download('wordnet')
13
  nltk.download('omw-1.4')
 
14
 
15
  # Ensure the spaCy model is installed
16
  try:
 
29
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
30
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
31
 
 
 
 
 
 
 
 
32
  # AI detection function using DistilBERT
33
  def detect_ai_generated(text):
34
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
 
46
  return [lemma.name() for lemma in lemmas]
47
  return []
48
 
49
+ # Paraphrasing function using spaCy and NLTK (without grammar correction)
50
  def paraphrase_with_spacy_nltk(text):
51
  doc = nlp(text)
52
  paraphrased_words = []
53
 
54
  for token in doc:
55
+ # Map spaCy POS tags to WordNet POS tags
56
  pos = None
57
  if token.pos_ in {"NOUN"}:
58
  pos = wordnet.NOUN
 
65
 
66
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
67
 
68
+ # Replace with a synonym only if it makes sense
69
  if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
70
  paraphrased_words.append(synonyms[0])
71
  else:
72
  paraphrased_words.append(token.text)
73
 
74
+ # Join the words back into a sentence
75
  paraphrased_sentence = ' '.join(paraphrased_words)
76
+
77
  return paraphrased_sentence
78
 
79
+ # Grammar correction function using LanguageTool
80
+ def correct_grammar(text):
81
+ tool = language_tool_python.LanguageTool('en-US')
82
+ matches = tool.check(text)
83
+ corrected_text = language_tool_python.utils.correct(text, matches)
84
+ return corrected_text
 
 
 
 
 
85
 
86
+ # Combined function: Paraphrase -> Grammar Check
87
+ def paraphrase_and_correct(text):
88
+ # Step 1: Paraphrase the text
89
  paraphrased_text = paraphrase_with_spacy_nltk(text)
90
 
91
+ # Step 2: Apply grammar correction
92
+ corrected_text = correct_grammar(paraphrased_text)
93
 
94
+ return corrected_text
 
 
 
95
 
96
  # Gradio interface definition
97
  with gr.Blocks() as interface:
 
99
  with gr.Column():
100
  text_input = gr.Textbox(lines=5, label="Input Text")
101
  detect_button = gr.Button("AI Detection")
102
+ paraphrase_button = gr.Button("Paraphrase & Correct Grammar")
103
  with gr.Column():
104
  output_text = gr.Textbox(label="Output")
105
 
106
  detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
107
+ paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
108
 
109
  # Launch the Gradio app
110
  interface.launch(debug=False)