|
import streamlit as st |
|
import plotly.graph_objects as go |
|
from transformers import pipeline |
|
import re |
|
import time |
|
import requests |
|
from PIL import Image |
|
import itertools |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from matplotlib.colors import rgb2hex |
|
import matplotlib |
|
from matplotlib.colors import ListedColormap, rgb2hex |
|
import ipywidgets as widgets |
|
from IPython.display import display, HTML |
|
import re |
|
import pandas as pd |
|
from pprint import pprint |
|
from tenacity import retry |
|
from tqdm import tqdm |
|
|
|
import scipy.stats |
|
import torch |
|
from transformers import GPT2LMHeadModel |
|
|
|
import seaborn as sns |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
from colorama import Fore, Style |
|
|
|
|
|
|
|
para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") |
|
para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") |
|
|
|
def paraphrase( |
|
question, |
|
num_beams=5, |
|
num_beam_groups=5, |
|
num_return_sequences=5, |
|
repetition_penalty=10.0, |
|
diversity_penalty=3.0, |
|
no_repeat_ngram_size=2, |
|
temperature=0.7, |
|
max_length=64 |
|
): |
|
input_ids = para_tokenizer( |
|
f'paraphrase: {question}', |
|
return_tensors="pt", padding="longest", |
|
max_length=max_length, |
|
truncation=True, |
|
).input_ids |
|
|
|
outputs = para_model.generate( |
|
input_ids, temperature=temperature, repetition_penalty=repetition_penalty, |
|
num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size, |
|
num_beams=num_beams, num_beam_groups=num_beam_groups, |
|
max_length=max_length, diversity_penalty=diversity_penalty |
|
) |
|
|
|
res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
|
|
return res |
|
|
|
|
|
def find_longest_common_sequences(main_sentence, paraphrases): |
|
main_tokens = main_sentence.split() |
|
common_sequences = set() |
|
|
|
for paraphrase in paraphrases: |
|
paraphrase_tokens = paraphrase.split() |
|
for i in range(len(main_tokens)): |
|
for j in range(len(paraphrase_tokens)): |
|
|
|
m = i |
|
n = j |
|
while m < len(main_tokens) and n < len(paraphrase_tokens) and main_tokens[m] == paraphrase_tokens[n]: |
|
m += 1 |
|
n += 1 |
|
|
|
if m - i > 1: |
|
sequence = ' '.join(main_tokens[i:m]) |
|
is_subsequence = any(sequence in existing_seq for existing_seq in common_sequences) |
|
if not is_subsequence: |
|
common_sequences.add(sequence) |
|
|
|
return sorted(common_sequences, key=len, reverse=True) |
|
|
|
|
|
longest_common_sequences = find_longest_common_sequences(main_sentence, paraphrases) |
|
|
|
|
|
color_palette = [Fore.RED, Fore.GREEN, Fore.BLUE, Fore.MAGENTA, Fore.CYAN] |
|
highlighted_sentence = main_sentence |
|
for i, sequence in enumerate(longest_common_sequences): |
|
color = color_palette[i % len(color_palette)] |
|
highlighted_sentence = highlighted_sentence.replace(sequence, f"{color}{sequence}{Style.RESET_ALL}") |
|
|
|
|
|
|
|
prompt_list=["The official position of the United States on the Russia-Ukraine war has been consistent in supporting Ukraine's sovereignty, territorial integrity, and the peaceful resolution of the conflict." |
|
] |
|
|
|
options = [f"Prompt #{i+1}: {prompt_list[i]}" for i in range(3)] + ["Another Prompt..."] |
|
selection = st.selectbox("Choose a prompt from the dropdown below . Click on :blue['Another Prompt...'] , if you want to enter your own custom prompt.", options=options) |
|
check=[] |
|
|
|
if selection == "Another Prompt...": |
|
check = st.text_input("Enter your custom prompt...") |
|
check = " " + check |
|
if check: |
|
st.caption(f""":white_check_mark: Your input prompt is : {check}""") |
|
st.caption(':green[Kindly hold on for a few minutes while the AI text is being generated]') |
|
|
|
else: |
|
check = re.split(r'#\d+:', selection, 1)[1] |
|
if check: |
|
st.caption(f""":white_check_mark: Your input prompt is : {check}""") |
|
st.caption(':green[Kindly hold on for a few minutes while the Paraphrase texts are being generated]') |
|
|
|
|
|
main_sentence = check |
|
|
|
st.write("Main Sentence:") |
|
st.write(main_sentence) |
|
|
|
|
|
paraphrases = paraphrase(main_sentence) |
|
|
|
|
|
st.write("Paraphrases:") |
|
for i, para in enumerate(paraphrases, 1): |
|
st.write(f"Paraphrase {i}:") |
|
st.write(para) |
|
|
|
|
|
|
|
st.text("Main sentence with highlighted longest common sequences:") |
|
st.text(highlighted_sentence) |