Spaces:
Sleeping
Sleeping
File size: 3,983 Bytes
6d05084 908889a cad7c8d c6d3d79 cad7c8d 03db62a c6d3d79 03db62a c6d3d79 6d05084 03db62a 23837f4 6d05084 03db62a c6d3d79 6d05084 c6d3d79 6d05084 43f3aa5 6d05084 43f3aa5 edc04cb 6d05084 908889a cad7c8d c6d3d79 cad7c8d cb90804 c0f7738 d6ce3c4 c0f7738 908889a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import random
import gradio as gr
from langchain_core.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
)
from langchain_core.messages import SystemMessage
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI
# Initialize Groq Langchain chat object and conversation
groq_chat = ChatGroq(
groq_api_key=os.environ.get("GROQ_API_KEY"),
)
# Initialize Google Langchain chat object and conversation
google_chat = ChatGoogleGenerativeAI(
api_key=os.environ.get("GOOGLE_API_KEY"),
)
# Initialize memory to manages the chat history,
# ensuring the AI remembers the specified number of history messages, in this case 8.
memory = ConversationBufferWindowMemory(k=8, memory_key="chat_history", return_messages=True)
def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed):
print( "Model =", model)
if model.startswith("gemini"):
chat = google_chat
chat.model = model
else:
chat = groq_chat
chat.model_name = model
prompt = ChatPromptTemplate.from_messages(
[
# This is the persistent system prompt, sets the initial context for the AI.
SystemMessage(content='You are a helpful AI assistant.'),
# This placeholder will take care of chat history.
MessagesPlaceholder(variable_name="chat_history"),
# This template is where the user's current input will be injected into the prompt.
HumanMessagePromptTemplate.from_template("{human_input}"),
]
)
# Create a conversation sequence using RunnableSequence
conversation = prompt | chat
# Load chat_history
chat_history = memory.load_memory_variables({})["chat_history"]
# The chatbot's answer is generated by sending the full prompt to the LLM
response = conversation.invoke({"human_input": user_input, "chat_history": chat_history})
# Update the memory with the new interaction
memory.save_context({"input": user_input}, {"output": response.content})
return response.content
# Define additional inputs and examples if needed
additional_inputs = [
gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it","gemini-1.5-pro", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-2.0-flash-exp"], value="llama-3.1-70b-versatile", label="Model"),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random")
]
example1 = [
["What's the distance from Tokyo to New York?"],
["What to San Francisco?"],
["Then what to Beijing?"],
["And what to Kyoto?"],
["What from Beijing to New York?"]
]
# Create the Gradio interface
interface = gr.ChatInterface(
fn=generate_response,
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
additional_inputs=additional_inputs,
examples=example1,
cache_examples=False,
)
# Launch the app
interface.launch()
|