from flask import Flask, request, render_template_string from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch import re import nltk nltk.download('punkt') from nltk import sent_tokenize class Adequacy: def __init__(self, model_tag='prithivida/parrot_adequacy_model'): from transformers import AutoModelForSequenceClassification, AutoTokenizer self.adequacy_model = AutoModelForSequenceClassification.from_pretrained(model_tag) self.tokenizer = AutoTokenizer.from_pretrained(model_tag) def filter(self, input_phrase, para_phrases, adequacy_threshold, device="cpu"): top_adequacy_phrases = [] for para_phrase in para_phrases: x = self.tokenizer(input_phrase, para_phrase, return_tensors='pt', max_length=128, truncation=True) x = x.to(device) self.adequacy_model = self.adequacy_model.to(device) logits = self.adequacy_model(**x).logits probs = logits.softmax(dim=1) prob_label_is_true = probs[:, 1] adequacy_score = prob_label_is_true.item() if adequacy_score >= adequacy_threshold: top_adequacy_phrases.append(para_phrase) return top_adequacy_phrases class Fluency: def __init__(self, model_tag='prithivida/parrot_fluency_model'): from transformers import AutoModelForSequenceClassification, AutoTokenizer self.fluency_model = AutoModelForSequenceClassification.from_pretrained(model_tag, num_labels=2) self.fluency_tokenizer = AutoTokenizer.from_pretrained(model_tag) def filter(self, para_phrases, fluency_threshold, device="cpu"): import numpy as np from scipy.special import softmax self.fluency_model = self.fluency_model.to(device) top_fluent_phrases = [] for para_phrase in para_phrases: input_ids = self.fluency_tokenizer("Sentence: " + para_phrase, return_tensors='pt', truncation=True) input_ids = input_ids.to(device) prediction = self.fluency_model(**input_ids) scores = prediction[0][0].detach().cpu().numpy() scores = softmax(scores) fluency_score = scores[1] # LABEL_0 = Bad Fluency, LABEL_1 = Good Fluency if fluency_score >= fluency_threshold: top_fluent_phrases.append(para_phrase) return top_fluent_phrases class Diversity: def __init__(self, model_tag='paraphrase-distilroberta-base-v2'): from sentence_transformers import SentenceTransformer self.diversity_model = SentenceTransformer(model_tag) def rank(self, input_phrase, para_phrases, diversity_ranker='levenshtein'): if diversity_ranker == "levenshtein": return self.levenshtein_ranker(input_phrase, para_phrases) elif diversity_ranker == "euclidean": return self.euclidean_ranker(input_phrase, para_phrases) elif diversity_ranker == "diff": return self.diff_ranker(input_phrase, para_phrases) def euclidean_ranker(self, input_phrase, para_phrases): import pandas as pd from sklearn_pandas import DataFrameMapper from sklearn.preprocessing import MinMaxScaler from scipy import spatial diversity_scores = {} outputs = [] input_enc = self.diversity_model.encode(input_phrase.lower()) for para_phrase in para_phrases: paraphrase_enc = self.diversity_model.encode(para_phrase.lower()) euclidean_distance = spatial.distance.euclidean(input_enc, paraphrase_enc) outputs.append((para_phrase, euclidean_distance)) df = pd.DataFrame(outputs, columns=['paraphrase', 'scores']) fields = [] for col in df.columns: if col == "scores": tup = ([col], MinMaxScaler()) else: tup = ([col], None) fields.append(tup) mapper = DataFrameMapper(fields, df_out=True) for index, row in mapper.fit_transform(df.copy()).iterrows(): diversity_scores[row['paraphrase']] = row['scores'] return diversity_scores def levenshtein_ranker(self, input_phrase, para_phrases): import Levenshtein diversity_scores = {} for para_phrase in para_phrases: distance = Levenshtein.distance(input_phrase.lower(), para_phrase) diversity_scores[para_phrase] = distance return diversity_scores def diff_ranker(self, input_phrase, para_phrases): import difflib differ = difflib.Differ() diversity_scores = {} for para_phrase in para_phrases: diff = differ.compare(input_phrase.split(), para_phrase.split()) count = 0 for d in diff: if "+" in d or "-" in d: count += 1 diversity_scores[para_phrase] = count return diversity_scores class Parrot: def __init__(self, model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=False): self.tokenizer = AutoTokenizer.from_pretrained(model_tag, use_auth_token=False) self.model = AutoModelForSeq2SeqLM.from_pretrained(model_tag, use_auth_token=False) self.adequacy_score = Adequacy() self.fluency_score = Fluency() self.diversity_score = Diversity() self.device = "cuda:0" if use_gpu and torch.cuda.is_available() else "cpu" self.model.to(self.device) def _clean_text(self, text): """Utility function to clean text by removing unwanted characters""" return re.sub('[^a-zA-Z0-9 \?\'\-\/\:\.]', '', text).lower() def _generate_paraphrases(self, input_phrase, max_length, max_return_phrases, do_diverse): """Generates paraphrases for a given input phrase""" input_phrase = self._clean_text(input_phrase) input_ids = self.tokenizer.encode("paraphrase: " + input_phrase, return_tensors='pt').to(self.device) if do_diverse: for n in range(2, 9): if max_return_phrases % n == 0: break preds = self.model.generate( input_ids, do_sample=False, max_length=max_length, num_beams=max_return_phrases, num_beam_groups=n, diversity_penalty=2.0, early_stopping=True, num_return_sequences=max_return_phrases) else: preds = self.model.generate( input_ids, do_sample=True, max_length=max_length, top_k=50, top_p=0.95, early_stopping=True, num_return_sequences=max_return_phrases) paraphrases = set(self.tokenizer.decode(pred, skip_special_tokens=True) for pred in preds) return self._clean_paraphrases(paraphrases) def _clean_paraphrases(self, paraphrases): """Utility function to clean generated paraphrases""" return {self._clean_text(phrase) for phrase in paraphrases} def _filter_and_rank_paraphrases(self, input_phrase, paraphrases, adequacy_threshold, fluency_threshold, diversity_ranker): """Filters and ranks paraphrases based on adequacy, fluency, and diversity""" adequacy_filtered_phrases = self.adequacy_score.filter(input_phrase, paraphrases, adequacy_threshold, self.device) if not adequacy_filtered_phrases: return [] fluency_filtered_phrases = self.fluency_score.filter(adequacy_filtered_phrases, fluency_threshold, self.device) if not fluency_filtered_phrases: return [] diversity_scored_phrases = self.diversity_score.rank(input_phrase, fluency_filtered_phrases, diversity_ranker) ranked_phrases = sorted(diversity_scored_phrases.items(), key=lambda x: x[1], reverse=True) return ranked_phrases def paraphrase_sentence(self, sentence, diversity_ranker="levenshtein", do_diverse=False, max_length=512, adequacy_threshold=0.90, fluency_threshold=0.90, max_return_phrases=10): """Paraphrases a single sentence""" paraphrases = self._generate_paraphrases(sentence, max_length, max_return_phrases, do_diverse) filtered_and_ranked = self._filter_and_rank_paraphrases(sentence, paraphrases, adequacy_threshold, fluency_threshold, diversity_ranker) return filtered_and_ranked def paraphrase_essay(self, essay, diversity_ranker="levenshtein", do_diverse=False, max_length=512, adequacy_threshold=0.90, fluency_threshold=0.90, max_return_phrases=10): """Paraphrases an entire essay sentence by sentence""" sentences = sent_tokenize(essay) paraphrased_sentences = [] for sentence in sentences: paraphrased = self.paraphrase_sentence(sentence, diversity_ranker, do_diverse, max_length, adequacy_threshold, fluency_threshold, max_return_phrases) if paraphrased: paraphrased_sentences.append(paraphrased[0][0]) return ' '.join(paraphrased_sentences) # Flask app setup app = Flask(__name__) parrot_instance = Parrot(use_gpu=False) @app.route('/', methods=['GET', 'POST']) def home(): paraphrased_essay = "" diversity_ranker = request.form.get('diversity_ranker', 'levenshtein') fluency_threshold = float(request.form.get('fluency_threshold', 0.9)) if request.method == 'POST': essay = request.form.get('essay') if essay: paraphrased_essay = parrot_instance.paraphrase_essay( essay, diversity_ranker=diversity_ranker, fluency_threshold=fluency_threshold ) return render_template_string(""" Paraphrase Your Essay
NexByte Logo

Paraphrase Your Essay

Humanize AI Text with the Best Paraphrasing Tool

{{ fluency_threshold }}
{% if paraphrased_essay %}

Paraphrased Essay

{{ paraphrased_essay }}
{% endif %}
Subscribe Now Contact Us

What is NexByte's Paraphrasing Tool?

The NexByte Paraphrasing Tool is an innovative online tool for converting AI-generated content into human-like writing. This programme, also known as the NexByte AI Text Converter, efficiently rewrites content written by AI writers such as ChatGPT, Google Bard, Microsoft Bing, Claude, QuillBot, Grammarly, Jasper.ai, Copy.ai, and any other AI text generator. It ensures that the text is free of robotic tones, rendering it indistinguishable from human writing.

Our application employs advanced proprietary algorithms to preserve the original content and context of the text while improving readability and Search Engine Optimisation (SEO) potential. The content created with NexByte Paraphrasing Tool is completely plagiarism-free and undetectable by all existing AI detectors on the market.

What Does "Paraphrasing AI Text" Mean?

Paraphrasing AI text entails transforming AI-generated content into writing that appears more naturally human. This technique entails making the language more interesting, accessible, and clear to human readers while removing any robotic tones.

NexByte's method for humanising AI text includes:

How Can We Paraphrase AI Text Online for Free?

Using the NexByte Paraphrasing Tool is simple and intuitive. Follow these easy steps to turn your AI-generated writing into human-like content:

Voila! You now have content that reads naturally, is free of robotic tones, and is undetectable by AI detection software.

Why Should I Use NexByte Paraphrasing Tool?

NexByte Paraphrasing Tool stands apart because:

Experience the future of content creation with the NexByte Paraphrasing Tool, which effortlessly transforms AI-generated prose into human-like masterpieces.

""", paraphrased_essay=paraphrased_essay, diversity_ranker=diversity_ranker, fluency_threshold=fluency_threshold) if __name__ == '__main__': app.run(debug=True)