Spaces:
Sleeping
Sleeping
File size: 4,972 Bytes
6d05084 908889a cad7c8d c6d3d79 cad7c8d f4b6670 6d05084 03db62a 23837f4 6d05084 03db62a c6d3d79 f4b6670 c6d3d79 f4b6670 c6d3d79 6d05084 c6d3d79 6d05084 43f3aa5 6d05084 43f3aa5 edc04cb 6d05084 908889a cad7c8d c6d3d79 cad7c8d cb90804 c0f7738 d6ce3c4 c0f7738 908889a 10c4a8d 46576f4 10c4a8d 46576f4 908889a 10c4a8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import os
import random
import gradio as gr
from langchain_core.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
)
from langchain_core.messages import SystemMessage
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI
# os.environ["GROQ_API_KEY"] = os.environ.get("GROQ_API_KEY")
# os.environ["GOOGLE_API_KEY"] = os.environ.get("GOOGLE_API_KEY")
# Initialize memory to manages the chat history,
# ensuring the AI remembers the specified number of history messages, in this case 8.
memory = ConversationBufferWindowMemory(k=8, memory_key="chat_history", return_messages=True)
def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed):
print( "Model =", model)
if model.startswith("gemini"):
chat = ChatGoogleGenerativeAI(
google_api_key = os.environ.get("GOOGLE_API_KEY"),
model = model,
temperature=temperature,
max_tokens=max_tokens,
timeout=None,
max_retries=2,
top_p = top_p
)
else:
chat = ChatGroq(
groq_api_key = os.environ.get("GROQ_API_KEY"),
model_name = model,
temperature=temperature,
max_tokens=max_tokens,
request_timeout=None,
max_retries=2,
top_p = top_p
)
prompt = ChatPromptTemplate.from_messages(
[
# This is the persistent system prompt, sets the initial context for the AI.
SystemMessage(content='You are a helpful AI assistant.'),
# This placeholder will take care of chat history.
MessagesPlaceholder(variable_name="chat_history"),
# This template is where the user's current input will be injected into the prompt.
HumanMessagePromptTemplate.from_template("{human_input}"),
]
)
# Create a conversation sequence using RunnableSequence
conversation = prompt | chat
# Load chat_history
chat_history = memory.load_memory_variables({})["chat_history"]
# The chatbot's answer is generated by sending the full prompt to the LLM
response = conversation.invoke({"human_input": user_input, "chat_history": chat_history})
# Update the memory with the new interaction
memory.save_context({"input": user_input}, {"output": response.content})
return response.content
# Define additional inputs and examples if needed
additional_inputs = [
gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it","gemini-1.5-pro", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-2.0-flash-exp"], value="llama-3.1-70b-versatile", label="Model"),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random")
]
example1 = [
["What's the distance from Tokyo to New York?"],
["What to San Francisco?"],
["Then what to Beijing?"],
["And what to Kyoto?"],
["What from Beijing to New York?"]
]
# # Create the Gradio interface
# interface = gr.ChatInterface(
# fn=generate_response,
# chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
# additional_inputs=additional_inputs,
# examples=example1,
# cache_examples=False,
# )
# # Launch the app
# interface.launch()
def clear_chat():
# print("Clear chat history")
memory.clear()
return None
with gr.Blocks(fill_width=True, fill_height=True) as demo:
ci= gr.ChatInterface(
fn=generate_response,
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
additional_inputs=additional_inputs,
examples=example1,
cache_examples=False,
)
ci.clear_btn.click(clear_chat)
# clear_button = gr.Button("Clear chat history and Start a new chat")
# clear_button.click(clear_chat, inputs=None, outputs=None)
demo.launch()
|