import gc import torch from nltk import sent_tokenize import nltk from tqdm import tqdm import gradio as gr from peft import PeftModel from transformers import T5ForConditionalGeneration, T5Tokenizer nltk.download("punkt") # autodetect the available device GPU_IDX = 1 # which GPU to use if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() print(f"Number of available GPUs: {num_gpus}") assert GPU_IDX < num_gpus, f"GPU index {GPU_IDX} not available." device = torch.device(f"cuda:{GPU_IDX}") print(f"Using GPU: {GPU_IDX}") else: print("CUDA is not available. Using CPU instead.") device = torch.device("cpu") batch_size = 64 # Configuration for models and their adapters model_config = { "Base Model": "polygraf-ai/poly-humanizer-base", "Large Model": "polygraf-ai/poly-humanizer-large", "XL Model": "polygraf-ai/poly-humanizer-XL-adapter", } # cache the base models, tokenizers, and adapters # initialize model and tokenizer models, tokenizers = {}, {} for name, path in model_config.items(): if name == "XL Model": model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", torch_dtype=torch.bfloat16).to(device) model = PeftModel.from_pretrained(model, path, torch_dtype=torch.bfloat16, is_trainable=False) model = model.merge_and_unload() models[name] = model tokenizers[name] = T5Tokenizer.from_pretrained("google/flan-t5-xl") else: model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device) models[name] = model tokenizers[name] = T5Tokenizer.from_pretrained(path) print(f"Loaded model: {name}, Num. params: {model.num_parameters()}") def paraphrase_sentences(model, tokenizer, sentences, temperature, repetition_penalty, top_k, length_penalty): inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences] inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device) outputs = model.generate( **inputs, do_sample=True, temperature=temperature, repetition_penalty=repetition_penalty, max_length=128, top_k=top_k, length_penalty=length_penalty, ) answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs] return answers def paraphrase_text( text, progress=gr.Progress(), model_name="Base Model", temperature=1.2, repetition_penalty=1.0, top_k=50, length_penalty=1.0, ): """ Optimization here is to feed all sentences at once to the model. Paragraphs are stored as a number of sentences per paragraph. """ progress(0, desc="Starting to Humanize") progress(0.05) # Select the model, tokenizer, and adapter tokenizer = tokenizers[model_name] model = models[model_name].to(device) # Split the text into paragraphs and then into sentences paragraphs = text.split("\n") all_sentences = [] sentences_per_paragraph = [] for paragraph in paragraphs: sentences = sent_tokenize(paragraph) sentences_per_paragraph.append(len(sentences)) all_sentences.extend(sentences) # Process all sentences in batches paraphrased_sentences = [] for i in range(0, len(all_sentences), batch_size): batch_sentences = all_sentences[i : i + batch_size] paraphrased_batch = paraphrase_sentences( model, tokenizer, batch_sentences, temperature, repetition_penalty, top_k, length_penalty ) paraphrased_sentences.extend(paraphrased_batch) # Clear memory torch.cuda.empty_cache() gc.collect() # Reconstruct paragraphs humanized_paragraphs = [] sentence_index = 0 for num_sentences in sentences_per_paragraph: humanized_paragraph = " ".join(paraphrased_sentences[sentence_index : sentence_index + num_sentences]) humanized_paragraphs.append(humanized_paragraph) sentence_index += num_sentences humanized_text = "\n".join(humanized_paragraphs) return humanized_text