chatV / lm /server_lm /ori /serve.py
tangmen's picture
add files
113dbd0
import os
import os
from pathlib import Path
import csv
import json
import openai
import time
import pandas as pd
# Set up the OpenAI API client
api_key = "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"
openai.api_key = api_key
# Set up the chatGPT model and prompt
model_engine = "text-davinci-003"
import gradio as gr
import time
import argparse
from vllm import LLM, SamplingParams
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str) # model path
parser.add_argument("--n_gpu", type=int, default=1) # n_gpu
return parser.parse_args()
def echo(message, history, system_prompt, temperature, max_tokens):
response = f"System prompt: {system_prompt}\n Message: {message}. \n Temperature: {temperature}. \n Max Tokens: {max_tokens}."
for i in range(min(len(response), int(max_tokens))):
time.sleep(0.05)
yield response[: i+1]
def align_data(data):
"""Given dict with lists, creates aligned strings
Adapted from Assignment 3 of CS224N
Args:
data: (dict) data["x"] = ["I", "love", "you"]
(dict) data["y"] = ["O", "O", "O"]
Returns:
data_aligned: (dict) data_align["x"] = "I love you"
data_align["y"] = "O O O "
"""
spacings = [max([len(seq[i]) for seq in data.values()])
for i in range(len(data[list(data.keys())[0]]))]
data_aligned = dict()
# for each entry, create aligned string
for key, seq in data.items():
str_aligned = ""
for token, spacing in zip(seq, spacings):
str_aligned += token + " " * (spacing - len(token) + 1)
data_aligned[key] = str_aligned
return data_aligned
def get_llm_result(input_data, input_domain):
# data is file path of topic result
ori_caption = input_data
# replace the static path as your azcopy target folder like: "C:\Users\zhengkai\PycharmProjects\pythonProject\sync_data"
# topic_file_path = "C:\\Users\zhengkai\PycharmProjects\pythonProject\sync_data\PreprocessData\\" + str(ori_caption) + "\step10_cook_json_file"
# prompt = (
# f"I want you to act as an Science Question Answering asker, ask in a Science Question style. I will speak to you \
# use a caption of an image you will mining the probable Science question and improved version of the problem in \
# Science Question style, in English. Keep the meaning same, but make them more science. I want you to only reply \
# the question and nothing else, do not write explanations. My first caption sentence is \"{ori_caption}\""
# # f"Tell me which scenarios in creator tool could improvement by creators of MSN through leverage ChatGPT."
# )
# prompt = f"{ori_caption},以这个句子为标题写一篇不少于1000字的{input_domain}专家风格的文章。每个段落多加一些细节和故事,增加文章的可读性。"
prompt = ""
def predict(message, history, system_prompt, temperature, max_tokens):
model_path = "/workspaceblobstore/caxu/trained_models/13Bv2_497kcontinueroleplay_dsys_2048_e4_2e_5/checkpoint-75"
llm = LLM(model=model_path, tensor_parallel_size=1)
instruction = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. "
for human, assistant in history:
instruction += 'USER: '+ human + ' ASSISTANT: '+ assistant + '</s>'
instruction += 'USER: '+ message + ' ASSISTANT:'
problem = [instruction]
stop_tokens = ["Question:", "Question", "USER:", "USER", "ASSISTANT:", "ASSISTANT", "Instruction:", "Instruction", "Response:", "Response"]
sampling_params = SamplingParams(temperature=temperature, top_p=1, max_tokens=max_tokens, stop=stop_tokens)
completions = llm.generate(problem, sampling_params)
for output in completions:
prompt = output.prompt
generated_text = output.outputs[0].text
return generated_text
# for idx in range(len(generated_text)):
# yield generated_text[:idx+1]
try:
# completion = openai.Completion.create(
# engine=model_engine,
# prompt=prompt,
# max_tokens=3000,
# n=1,
# stop=None,
# temperature=0.5,
# )
#
# response = completion.choices[0].text
# shorten_response = response.replace("\n", "").strip()
# len_response = len(shorten_response.split(" "))
# if len_response >= 3500:
# shorten_response = "".join(shorten_response.split(" ")[:3500])
# print("X"*10)
# print(f"shorten_response is {shorten_response}")
# list_shorten = shorten_response.split(" ")
# print(list_shorten)
# print(f"length is {len(list_shorten)}")
# title_prompt = f"{shorten_response},给这个文章写一个头条号风格的标题。增加标题的吸引力,可读性。"
# title_completion = openai.Completion.create(
# engine=model_engine,
# prompt=title_prompt,
# max_tokens=200,
# n=1,
# stop=None,
# temperature=0.5,
# )
# title_response = title_completion.choices[0].text
history = ""
prompt = ""
system_prompt = ""
response = predict(prompt, history, system_prompt, 0.5, 3000)
print(response)
# if not os.path.isdir(topic_file_path):
# print("File folder not exist")
# topic_result_file = ""
# topic_file_name_pattern = "step10_json_filestep9_merge_rewrite_"
# for filename in os.listdir(topic_file_path):
# if filename.startswith(topic_file_name_pattern):
# topic_result_file = os.path.join(topic_file_path, filename)
#
# data_aligned = dict()
# output_dir_name = "."
# output_dir = os.path.join(output_dir_name, "result_topic_file")
# Path(output_dir).mkdir(parents=True, exist_ok=True)
# write_file_name = "save_server_" + topic_file_path.split("\\")[-1]
# write_output_file_path = os.path.join(output_dir, write_file_name)
#
# with open(topic_result_file, encoding="utf8") as f:
# json_data = json.load(f)
# return json_data
return response, response
except Exception as ex:
print("File not exist")
raise ex
def get_topic_result(input_data, input_domain):
# data is file path of topic result
ori_caption = input_data
# replace the static path as your azcopy target folder like: "C:\Users\zhengkai\PycharmProjects\pythonProject\sync_data"
# topic_file_path = "C:\\Users\zhengkai\PycharmProjects\pythonProject\sync_data\PreprocessData\\" + str(ori_caption) + "\step10_cook_json_file"
# prompt = (
# f"I want you to act as an Science Question Answering asker, ask in a Science Question style. I will speak to you \
# use a caption of an image you will mining the probable Science question and improved version of the problem in \
# Science Question style, in English. Keep the meaning same, but make them more science. I want you to only reply \
# the question and nothing else, do not write explanations. My first caption sentence is \"{ori_caption}\""
# # f"Tell me which scenarios in creator tool could improvement by creators of MSN through leverage ChatGPT."
# )
prompt = f"{ori_caption},以这个句子为标题写一篇不少于1000字的{input_domain}专家风格的文章。每个段落多加一些细节和故事,增加文章的可读性。"
try:
completion = openai.Completion.create(
engine=model_engine,
prompt=prompt,
max_tokens=3000,
n=1,
stop=None,
temperature=0.5,
)
response = completion.choices[0].text
shorten_response = response.replace("\n", "").strip()
len_response = len(shorten_response.split(" "))
if len_response >= 3500:
shorten_response = "".join(shorten_response.split(" ")[:3500])
print("X"*10)
print(f"shorten_response is {shorten_response}")
list_shorten = shorten_response.split(" ")
print(list_shorten)
print(f"length is {len(list_shorten)}")
title_prompt = f"{shorten_response},给这个文章写一个头条号风格的标题。增加标题的吸引力,可读性。"
title_completion = openai.Completion.create(
engine=model_engine,
prompt=title_prompt,
max_tokens=200,
n=1,
stop=None,
temperature=0.5,
)
title_response = title_completion.choices[0].text
# print(response)
# if not os.path.isdir(topic_file_path):
# print("File folder not exist")
# topic_result_file = ""
# topic_file_name_pattern = "step10_json_filestep9_merge_rewrite_"
# for filename in os.listdir(topic_file_path):
# if filename.startswith(topic_file_name_pattern):
# topic_result_file = os.path.join(topic_file_path, filename)
#
# data_aligned = dict()
# output_dir_name = "."
# output_dir = os.path.join(output_dir_name, "result_topic_file")
# Path(output_dir).mkdir(parents=True, exist_ok=True)
# write_file_name = "save_server_" + topic_file_path.split("\\")[-1]
# write_output_file_path = os.path.join(output_dir, write_file_name)
#
# with open(topic_result_file, encoding="utf8") as f:
# json_data = json.load(f)
# return json_data
return response, title_response
except Exception as ex:
print("File not exist")
raise ex
def get_model_api():
"""Returns lambda function for api"""
def model_api(input_title, input_domain):
"""
Args:
input_data: submitted to the API, raw string
Returns:
output_data: after some transformation, to be
returned to the API
"""
# print("X"*10)
# print(f"input_title is {input_title}")
# print(f"input_data2 is {input_domain}")
punc = [",", "?", ".", ":", ";", "!", "(", ")", "[", "]"]
# preds, title_preds = get_topic_result(input_title, input_domain)
preds, title_preds = get_llm_result(input_title, input_domain)
output_data = {"input_title": input_title, "output": preds, "title_output": title_preds}
return output_data
return model_api
# config = Config()
# model = NERModel(config)