Spaces:

jchen8000
/

LangChain_Demo

Sleeping

File size: 3,983 Bytes

6d05084
 
908889a
cad7c8d
 
 
 
 
 
 
 
c6d3d79
cad7c8d
03db62a
 
c6d3d79
03db62a
c6d3d79
 
 
 
 
6d05084
03db62a
23837f4
 
6d05084
 
03db62a
 
c6d3d79
 
 
 
 
 
 
 
6d05084
 
 
 
 
 
 
 
 
 
 
 
c6d3d79
6d05084
 
 
 
 
43f3aa5
6d05084
 
43f3aa5
edc04cb
6d05084
908889a
 
cad7c8d
c6d3d79
cad7c8d
 
 
 
 
 
cb90804
c0f7738
 
 
 
d6ce3c4
c0f7738
908889a

import os
import random
import gradio as gr
from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain_core.messages import SystemMessage
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI

# Initialize Groq Langchain chat object and conversation
groq_chat = ChatGroq(
    groq_api_key=os.environ.get("GROQ_API_KEY"),
)
# Initialize Google Langchain chat object and conversation
google_chat = ChatGoogleGenerativeAI(
    api_key=os.environ.get("GOOGLE_API_KEY"),
)


# Initialize memory to manages the chat history, 
# ensuring the AI remembers the specified number of history messages, in this case 8.
memory = ConversationBufferWindowMemory(k=8, memory_key="chat_history", return_messages=True)


def generate_response(user_input, history, model, temperature, max_tokens, top_p, seed):
    print( "Model =", model)

    if model.startswith("gemini"):
        chat = google_chat
        chat.model = model
    else:
        chat = groq_chat
        chat.model_name = model

    prompt = ChatPromptTemplate.from_messages(
        [
            # This is the persistent system prompt, sets the initial context for the AI.
            SystemMessage(content='You are a helpful AI assistant.'),
            # This placeholder will take care of chat history.
            MessagesPlaceholder(variable_name="chat_history"),
            # This template is where the user's current input will be injected into the prompt.
            HumanMessagePromptTemplate.from_template("{human_input}"),
        ]
    )    

    # Create a conversation sequence using RunnableSequence
    conversation = prompt | chat

    # Load chat_history
    chat_history = memory.load_memory_variables({})["chat_history"]
    
    # The chatbot's answer is generated by sending the full prompt to the LLM
    response = conversation.invoke({"human_input": user_input, "chat_history": chat_history})

    # Update the memory with the new interaction
    memory.save_context({"input": user_input}, {"output": response.content})
    
    return response.content

# Define additional inputs and examples if needed
additional_inputs = [
    gr.Dropdown(choices=["llama-3.1-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it", "gemma-7b-it","gemini-1.5-pro", "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-2.0-flash-exp"], value="llama-3.1-70b-versatile", label="Model"),
    gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
    gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
    gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
    gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random")
]

example1 = [
            ["What's the distance from Tokyo to New York?"],    
            ["What to San Francisco?"],        
            ["Then what to Beijing?"],
            ["And what to Kyoto?"],
            ["What from Beijing to New York?"]
        ]

# Create the Gradio interface
interface = gr.ChatInterface(
    fn=generate_response, 
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    examples=example1,
    cache_examples=False,
)

# Launch the app
interface.launch()