Towhidul commited on
Commit
96109b6
·
verified ·
1 Parent(s): 9ffa87f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -39
app.py CHANGED
@@ -1,16 +1,48 @@
1
  import streamlit as st
 
 
2
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  from colorama import Fore, Style
 
 
5
 
6
- # Initialize tokenizer and model
7
  para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
8
  para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
9
 
10
- def paraphrase(question, num_beams=5, num_beam_groups=5, num_return_sequences=5,
11
- repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2,
12
- temperature=0.7, max_length=64):
13
- # Tokenize input and generate paraphrases
 
 
 
 
 
 
 
14
  input_ids = para_tokenizer(
15
  f'paraphrase: {question}',
16
  return_tensors="pt", padding="longest",
@@ -25,11 +57,11 @@ def paraphrase(question, num_beams=5, num_beam_groups=5, num_return_sequences=5,
25
  max_length=max_length, diversity_penalty=diversity_penalty
26
  )
27
 
28
- # Decode paraphrases
29
  res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True)
30
 
31
  return res
32
 
 
33
  def find_longest_common_sequences(main_sentence, paraphrases):
34
  main_tokens = main_sentence.split()
35
  common_sequences = set()
@@ -53,63 +85,56 @@ def find_longest_common_sequences(main_sentence, paraphrases):
53
 
54
  return sorted(common_sequences, key=len, reverse=True)
55
 
56
- def highlight_paraphrases(main_sentence, paraphrases):
57
- # Extracting longest common sequences
58
- longest_common_sequences = find_longest_common_sequences(main_sentence, paraphrases)
59
 
60
- # Assigning colors to different sequences
61
- color_palette = [Fore.RED, Fore.GREEN, Fore.BLUE, Fore.MAGENTA, Fore.CYAN]
62
- highlighted_paraphrases = []
63
 
64
- for para in paraphrases:
65
- highlighted_para = para
66
- for i, sequence in enumerate(longest_common_sequences):
67
- color = color_palette[i % len(color_palette)]
68
- highlighted_para = highlighted_para.replace(sequence, f"{color}{sequence}{Style.RESET_ALL}")
69
- highlighted_paraphrases.append(highlighted_para)
70
 
71
- return highlighted_paraphrases
72
-
73
- prompt_list=["The official position of the United States on the Russia-Ukraine war has been consistent in supporting Ukraine's sovereignty, territorial integrity, and the peaceful resolution of the conflict.",
74
- "Joe Biden said we’d not send U.S. troops to fight Russian troops in Ukraine, but we would provide robust military assistance and try to unify the Western world against Russia’s aggression."]
75
 
76
  options = [f"Prompt #{i+1}: {prompt_list[i]}" for i in range(len(prompt_list))] + ["Another Prompt..."]
77
- selection = st.selectbox("Choose a prompt from the dropdown below. Click on 'Another Prompt...' if you want to enter your own custom prompt.", options=options)
78
- check = []
79
 
80
  if selection == "Another Prompt...":
81
  check = st.text_input("Enter your custom prompt...")
82
  check = " " + check
83
  if check:
84
- st.caption(f" Your input prompt is: {check}")
85
- st.caption('🟢 Kindly hold on for a few minutes while the AI text is being generated.')
 
86
  else:
87
  check = re.split(r'#\d+:', selection, 1)[1]
88
  if check:
89
- st.caption(f" Your input prompt is: {check}")
90
- st.caption('🟢 Kindly hold on for a few minutes while the Paraphrase texts are being generated.')
 
91
 
92
  main_sentence = check
93
 
94
- st.markdown("**Main Sentence:**")
95
  st.write(main_sentence)
96
 
97
  # Generate paraphrases
98
  paraphrases = paraphrase(main_sentence)
99
 
100
- # Highlight paraphrases
101
- highlighted_paraphrases = highlight_paraphrases(main_sentence, paraphrases)
 
 
 
 
 
 
 
 
102
 
103
  # Display paraphrases with numbers
104
- st.markdown("**Paraphrases:**")
105
- for i, para in enumerate(highlighted_paraphrases, 1):
106
  st.write(f"Paraphrase {i}:")
107
  st.write(para)
108
 
109
- st.markdown("**Main sentence with highlighted longest common sequences:**")
110
- # Highlight longest common sequences in the main sentence
111
- highlighted_sentence = main_sentence
112
- for i, sequence in enumerate(find_longest_common_sequences(main_sentence, paraphrases)):
113
- highlighted_sentence = highlighted_sentence.replace(sequence, f"<span style='color: {color_palette[i % len(color_palette)]}'>{sequence}</span>")
114
 
115
- st.markdown(highlighted_sentence, unsafe_allow_html=True)
 
 
 
1
  import streamlit as st
2
+ import plotly.graph_objects as go
3
+ from transformers import pipeline
4
  import re
5
+ import time
6
+ import requests
7
+ from PIL import Image
8
+ import itertools
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ from matplotlib.colors import rgb2hex
12
+ import matplotlib
13
+ from matplotlib.colors import ListedColormap, rgb2hex
14
+ import ipywidgets as widgets
15
+ from IPython.display import display, HTML
16
+ import re
17
+ import pandas as pd
18
+ from pprint import pprint
19
+ from tenacity import retry
20
+ from tqdm import tqdm
21
+ # import tiktoken
22
+ import scipy.stats
23
+ import torch
24
+ from transformers import GPT2LMHeadModel
25
+ # import tiktoken
26
+ import seaborn as sns
27
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
28
  from colorama import Fore, Style
29
+ # import openai
30
+
31
 
 
32
  para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
33
  para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
34
 
35
+ def paraphrase(
36
+ question,
37
+ num_beams=5,
38
+ num_beam_groups=5,
39
+ num_return_sequences=5,
40
+ repetition_penalty=10.0,
41
+ diversity_penalty=3.0,
42
+ no_repeat_ngram_size=2,
43
+ temperature=0.7,
44
+ max_length=64 #128
45
+ ):
46
  input_ids = para_tokenizer(
47
  f'paraphrase: {question}',
48
  return_tensors="pt", padding="longest",
 
57
  max_length=max_length, diversity_penalty=diversity_penalty
58
  )
59
 
 
60
  res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True)
61
 
62
  return res
63
 
64
+
65
  def find_longest_common_sequences(main_sentence, paraphrases):
66
  main_tokens = main_sentence.split()
67
  common_sequences = set()
 
85
 
86
  return sorted(common_sequences, key=len, reverse=True)
87
 
 
 
 
88
 
 
 
 
89
 
 
 
 
 
 
 
90
 
91
+ prompt_list=["The official position of the United States on the Russia-Ukraine war has been consistent in supporting Ukraine's sovereignty, territorial integrity, and the peaceful resolution of the conflict."
92
+ ,"Joe Biden said we’d not send U.S. troops to fight Russian troops in Ukraine, but we would provide robust military assistance and try to unify the Western world against Russia’s aggression."]
 
 
93
 
94
  options = [f"Prompt #{i+1}: {prompt_list[i]}" for i in range(len(prompt_list))] + ["Another Prompt..."]
95
+ selection = st.selectbox("Choose a prompt from the dropdown below . Click on :blue['Another Prompt...'] , if you want to enter your own custom prompt.", options=options)
96
+ check=[]
97
 
98
  if selection == "Another Prompt...":
99
  check = st.text_input("Enter your custom prompt...")
100
  check = " " + check
101
  if check:
102
+ st.caption(f""":white_check_mark: Your input prompt is : {check}""")
103
+ st.caption(':green[Kindly hold on for a few minutes while the AI text is being generated]')
104
+
105
  else:
106
  check = re.split(r'#\d+:', selection, 1)[1]
107
  if check:
108
+ st.caption(f""":white_check_mark: Your input prompt is : {check}""")
109
+ st.caption(':green[Kindly hold on for a few minutes while the Paraphrase texts are being generated]')
110
+
111
 
112
  main_sentence = check
113
 
114
+ st.markdown("**Main Sentence**:")
115
  st.write(main_sentence)
116
 
117
  # Generate paraphrases
118
  paraphrases = paraphrase(main_sentence)
119
 
120
+ # Extracting longest common sequences
121
+ longest_common_sequences = find_longest_common_sequences(main_sentence, paraphrases)
122
+
123
+ color_palette = ["#FF0000", "#008000", "#0000FF", "#FF00FF", "#00FFFF"]
124
+ highlighted_sentence = main_sentence
125
+ for i, sequence in enumerate(longest_common_sequences):
126
+ color = color_palette[i % len(color_palette)]
127
+ highlighted_sentence = highlighted_sentence.replace(sequence, f"<span style='color:{color}'>{sequence}</span>")
128
+
129
+
130
 
131
  # Display paraphrases with numbers
132
+ st.markdown("**Paraphrases**:")
133
+ for i, para in enumerate(paraphrases, 1):
134
  st.write(f"Paraphrase {i}:")
135
  st.write(para)
136
 
 
 
 
 
 
137
 
138
+ # Displaying the main sentence with highlighted longest common sequences
139
+ st.markdown("**Main sentence with highlighted longest common sequences**:")
140
+ st.markdown(highlighted_sentence, unsafe_allow_html=True)