LangChain_Demo / app.py
jchen8000's picture
Update app.py
43f3aa5 verified
raw
history blame
3.67 kB
import os
import random
import gradio as gr
from langchain.chains import LLMChain
from langchain_core.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
)
from langchain_core.messages import SystemMessage
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_groq import ChatGroq
# # Initialize the language model and memory
# llm = Groq(api_key="your_groq_api_key")
# memory = ConversationBufferMemory()
# # Define the conversation chain
# conversation = ConversationChain(llm=llm, memory=memory)
# Function to generate responses
def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed):
print( "Model =", model)
# Initialize Groq Langchain chat object and conversation
groq_chat = ChatGroq(
groq_api_key=os.environ.get("GROQ_API_KEY"),
model_name=model
)
# Manages the chat history, ensuring the AI remembers the specified number of history messages, in this case 5.
memory = ConversationBufferWindowMemory(k=5, memory_key="chat_history", return_messages=True)
prompt = ChatPromptTemplate.from_messages(
[
# This is the persistent system prompt, sets the initial context for the AI.
SystemMessage(content='You are a helpful AI assistant.'),
# This placeholder will take care of chat history.
MessagesPlaceholder(variable_name="chat_history"),
# This template is where the user's current input will be injected into the prompt.
HumanMessagePromptTemplate.from_template("{human_input}"),
]
)
# Create a conversation sequence using RunnableSequence
conversation = prompt | groq_chat
# Load chat_history
chat_history = memory.load_memory_variables({})["chat_history"]
# The chatbot's answer is generated by sending the full prompt to the LLM
response = conversation.invoke({"human_input": user_input, "chat_history": chat_history})
# Update the memory with the new interaction
memory.save_context({"input": user_input}, {"output": response.content})
return response.content
# Define additional inputs and examples if needed
additional_inputs = [
gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it"], value="llama-3.1-70b-versatile", label="Model"),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random")
]
example1 = [
["Who are you?"],
]
# Create the Gradio interface
interface = gr.ChatInterface(
fn=generate_response,
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
additional_inputs=additional_inputs,
examples=example1,
cache_examples=False,
)
# Launch the app
interface.launch()