import gradio as gr import time import requests import json import os MODEL = "gpt-4-0125-preview" API_URL = os.getenv("API_URL") API_KEY = os.getenv("API_KEY") url = f"{API_URL}/v1/chat/completions" # The headers for the HTTP request headers = { "accept": "application/json", "Content-Type": "application/json", "Authorization": f"Bearer {API_KEY}", } def is_valid_json(data): try: # Attempt to parse the JSON data parsed_data = json.loads(data) return True, parsed_data except ValueError as e: # If an error occurs, the JSON is not valid return False, str(e) with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") with gr.Row(): with gr.Column(scale=4): # Define inputs for additional parameters system_prompt_input = gr.Textbox( label="System Prompt", placeholder="Type system prompt here...", value="You are a helpful assistant.", ) temperature_input = gr.Slider( label="Temperature", minimum=0.0, maximum=1.0, value=0.9, step=0.01 ) max_new_tokens_input = gr.Slider( label="Max New Tokens", minimum=0, maximum=1024, value=256, step=1 ) top_p_input = gr.Slider( label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.01 ) repetition_penalty_input = gr.Slider( label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.2, step=0.01, ) with gr.Column(scale=1): markup = gr.Markdown("## Mistral 7B Instruct v0.2 GGUF") def update_globals( system_prompt, temperature, max_new_tokens, top_p, repetition_penalty ): global global_system_prompt, global_temperature, global_max_new_tokens, global_top_p, global_repetition_penalty global_system_prompt = system_prompt global_temperature = temperature global_max_new_tokens = max_new_tokens global_top_p = top_p global_repetition_penalty = repetition_penalty def user(user_message, history): # print(f"User: {user_message}") # print(f"History: {history}") return "", history + [[user_message, None]] def bot( history, system_prompt, temperature, max_new_tokens, top_p, repetition_penalty ): print(f"History in bot: {history}") print(f"System Prompt: {system_prompt}") print(f"Temperature: {temperature}") print(f"Max New Tokens: {max_new_tokens}") print(f"Top P: {top_p}") print(f"Repetition Penalty: {repetition_penalty}") # print(f"History in bot: {history}") # [['Capital of France', 'The capital city of France is Paris.'], ['Thansk', 'You are welcome.'], ['What is the capital of France?', '']] # convert this to [['Capital of France', 'The capital city of France is Paris.'], ['Thansk', 'You are welcome.'], ['What is the capital of France?', '']] to list of dict of role user and assiatant history_messages = [{"content": h[0], "role": "user"} for h in history if h[0]] # let's extract the user's question which should be the last touple first element # user_question = history[-1][0] history[-1][1] = "" history_messages = system_prompt + history_messages print(history_messages) data = { "messages": history_messages, "stream": True, "temprature": temperature, "top_k": 50, "top_p": 0.95, "seed": 42, "repeat_penalty": repetition_penalty, "chat_format": "mistral-instruct", "max_tokens": max_new_tokens, "response_format": { "type": "json_object", }, } # # Making the POST request and streaming the response response = requests.post( url, headers=headers, data=json.dumps(data), stream=True ) for line in response.iter_lines(): # Filter out keep-alive new lines if line: data = line.decode("utf-8").lstrip("data: ") # Check if the examples are valid valid_check = is_valid_json(data) if valid_check[0]: try: # Attempt to parse the JSON dataa # json_data = json.loads(data) json_data = valid_check[1] delta_content = ( json_data.get("choices", [{}])[0] .get("delta", {}) .get("content", "") ) if delta_content: # Ensure there's content to print # print(f"Bot: {delta_content}") history[-1][1] += delta_content # print(history) time.sleep(0.05) yield history except json.JSONDecodeError as e: print( f"Error decoding JSON: {e} date: {data}" ) # print(delta_content, flush=True, end="") # print(json_data['choices'][0]) msg.submit( user, [msg, chatbot], [msg, chatbot], queue=False, concurrency_limit=5 ).then( bot, inputs=[ chatbot, system_prompt_input, temperature_input, max_new_tokens_input, top_p_input, repetition_penalty_input, ], outputs=chatbot, ) clear.click(lambda: None, None, chatbot, queue=False) demo.queue() if __name__ == "__main__": demo.launch(show_api=False, share=False)