Spaces:
Sleeping
Sleeping
File size: 8,893 Bytes
a1e5063 94aba93 fa94b9c cde20ed f9b7274 fa94b9c 94aba93 004631e ec8b3ca a939e85 ec8b3ca d72eb57 fa94b9c d72eb57 94aba93 5a521e1 a939e85 6465bf9 9b51486 7d9386f d72eb57 bee79a3 9b51486 e7b8dfa 87ff15f e7b8dfa 6465bf9 d72eb57 6d9b69f 9b51486 6d9b69f c6e7ac9 6d9b69f 004631e 9b51486 c6e7ac9 9c6e8e6 0ebfc8a e90d0d7 c6e7ac9 20645d3 6d9b69f e7b8dfa f6f9adc c6e7ac9 e7b8dfa c6e7ac9 e7b8dfa 004631e e7b8dfa c6e7ac9 9c6e8e6 0ebfc8a 6465bf9 0ebfc8a 6465bf9 aa9d7ef 94aba93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import gradio as gr
import random
import os
import copy
import torch
from huggingface_hub import login
from transformers import pipeline
from transformers import GPT2Tokenizer, GPT2LMHeadModel,set_seed
from transformers import AutoTokenizer, AutoModelWithLMHead,AutoModelForSeq2SeqLM
import datetime
import nltk
nltk.download('stopwords')
nltk.download('punctuation')
nltk.download('punkt')
from rake_nltk import Rake
login(os.environ["HF_TOKEN"])
#https://huggingface.co/facebook/opt-1.3b
#generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
# dt stores the current date and time
dt = datetime.datetime.now()
print(dt)
print("loading models")
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="G:\My Drive\Avatar\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
default_temperature=0.01
default_seed=43
def create_response_question_generation(input_str, max_length=64):
input_text = "answer: %s context: %s </s>" % (input_str, input_str)
print(f"create question input_text={input_text}")
features = question_generation_tokenizer([input_text], return_tensors='pt')
output = question_generation_model.generate(input_ids=features['input_ids'],
attention_mask=features['attention_mask'],
max_length=max_length)
return question_generation_tokenizer.decode(output[0])
def create_response_paraphrase(input_str, max_length,num_return_sequences):
text = "paraphrase: " + input_str + " </s>"
encoding = paraphrase_tokenizer.encode_plus(text, pad_to_max_length=True, return_tensors="pt")
input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
num_return_sequences = int(num_return_sequences) # Ensure this is an integer
max_length = int(max_length)
outputs = paraphrase_model.generate(
input_ids=input_ids, attention_mask=attention_masks,
# max_length=256,
max_length=max_length,
do_sample=True,
top_k=120,
top_p=0.95,
early_stopping=True,
num_return_sequences=num_return_sequences,
repetition_penalty=1.5
)
result_output_str=""
for output in outputs:
line = paraphrase_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
result_output_str=result_output_str+line+"<br/>"
# results.append(line)
# return results
return result_output_str
import string
def contains_digit_or_punctuation(s):
return any(char.isdigit() or char in string.punctuation for char in s)
rake = Rake()
def create_response_keywords_extraction(input_str):
rake.extract_keywords_from_text(input_str)
keywords_with_scores = rake.get_ranked_phrases_with_scores()
filtered_keywords = []
seen_keywords = set()
for score, keyword in keywords_with_scores:
# Apply filters: score must be greater than 1, keyword must not contain digits or punctuation
if score > 1 and not contains_digit_or_punctuation(keyword) and keyword not in seen_keywords:
filtered_keywords.append((score, keyword))
seen_keywords.add(keyword)
output_string=""
for score, keyword in filtered_keywords:
#print(f"Score: {score}, Keyword: {keyword}")
output_string= output_string + f"Score: {score}, Keyword: {keyword} <br/>"
return output_string
def create_response_intention_classification(input_str):
labels = ["dialogue", "long content generation"]
# Perform classification
output_string=""
result = classifier(input_str, labels)
for label, score in zip(result["labels"], result["scores"]):
output_string= output_string + f"Label: {label}, Score: {score:.4f} <br/>"
return output_string
interface_question_generation = gr.Interface(fn=create_response_question_generation,
title="Question Generation",
description="Enter a statmente like Paris is the capital of France",
inputs=[
gr.Textbox(label="input text here", lines=3, value="Paris is the capital of France"),
gr.Number(
label="max length",
value=64),
],
outputs="html"
)
interface_paraphrase = gr.Interface(fn=create_response_paraphrase,
title="Paraphrase",
description="Paraphrase sentences",
#examples=examples,
inputs=[
gr.Textbox(label="input text here", lines=3, value="It is truly a great cosmic paradox that one of the best teachers in all of life turns out to be death. No person or situation could ever teach you as much as death has to teach you. "),
gr.Number(
label="max length",
value=512),
gr.Number(
label="num of responses",
value=2)
],
outputs="html"
)
interface_extract_keywords = gr.Interface(fn=create_response_keywords_extraction,
title="Extract Keywords",
description="Extract Keywords ",
#examples=examples,
inputs=[
gr.Textbox(label="input text here", lines=3, value="It is truly a great cosmic paradox that one of the best teachers in all of life turns out to be death. No person or situation could ever teach you as much as death has to teach you. "),
],
outputs="html"
)
interface_intention_classification = gr.Interface(fn=create_response_intention_classification,
title="Intention Classification",
description="Find if question intention is short dialog or long content generation. How are you? versus WWhat are the implications of quantum computing on global security? (difference not very dramatic as of now)",
#examples=examples,
inputs=[
gr.Textbox(label="input text here", lines=3, value="What are the implications of quantum computing on global security?"),
],
outputs="html"
)
demo = gr.TabbedInterface([interface_question_generation, interface_paraphrase,interface_extract_keywords,interface_intention_classification], ["Question Generation", "Paraphrase", "Keywords Extraction", "Intention Classification"])
demo.launch() |