PabloVD commited on
Commit
b0ae0d1
·
1 Parent(s): 530e2cc

Add app and requirements

Browse files
Files changed (2) hide show
  1. app.py +74 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Following https://python.langchain.com/docs/tutorials/chatbot/
2
+ # Missing: trimming, streaming with memory, use multiple threads
3
+
4
+ from langchain_mistralai import ChatMistralAI
5
+ from langchain_core.rate_limiters import InMemoryRateLimiter
6
+ from langgraph.checkpoint.memory import MemorySaver
7
+ from langgraph.graph import START, MessagesState, StateGraph
8
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
9
+ from langchain_core.messages import HumanMessage, AIMessage
10
+ import gradio as gr
11
+
12
+ # Prompt template
13
+ prompt = ChatPromptTemplate.from_messages(
14
+ [
15
+ (
16
+ "system",
17
+ "You are an AI assistant. Answer all questions to the best of your ability.",
18
+ ),
19
+ MessagesPlaceholder(variable_name="messages"),
20
+ ]
21
+ )
22
+
23
+ # Rate limiter
24
+ rate_limiter = InMemoryRateLimiter(
25
+ requests_per_second=0.1, # <-- MistralAI free. We can only make a request once every second
26
+ check_every_n_seconds=0.01, # Wake up every 100 ms to check whether allowed to make a request,
27
+ max_bucket_size=10, # Controls the maximum burst size.
28
+ )
29
+
30
+ model = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)
31
+
32
+ # Define a new graph
33
+ workflow = StateGraph(state_schema=MessagesState)
34
+
35
+ # Define the function that calls the model
36
+ def call_model(state: MessagesState):
37
+ chain = prompt | model
38
+ response = chain.invoke(state)
39
+ return {"messages": response}
40
+
41
+ # Define the (single) node in the graph
42
+ workflow.add_edge(START, "model")
43
+ workflow.add_node("model", call_model)
44
+
45
+ # Add memory
46
+ memory = MemorySaver()
47
+ app = workflow.compile(checkpointer=memory)
48
+
49
+ # Config with thread
50
+ config = {"configurable": {"thread_id": "abc345"}}
51
+
52
+
53
+ def handle_prompt(query, history):
54
+ input_messages = [HumanMessage(query)]
55
+ try:
56
+ # Stream output
57
+ # out=""
58
+ # for chunk, metadata in app.stream({
59
+ # "messages": input_messages},
60
+ # config,
61
+ # stream_mode="messages"):
62
+ # if isinstance(chunk, AIMessage): # Filter to just model responses
63
+ # out += chunk.content
64
+ # yield out
65
+ output = app.invoke({"messages": input_messages}, config)
66
+ return output["messages"][-1].content
67
+ except:
68
+ raise gr.Error("Requests rate limit exceeded")
69
+
70
+ description = "A MistralAI powered chatbot, using Langchain and deployed with Gradio."
71
+
72
+ demo = gr.ChatInterface(handle_prompt, type="messages", title="Medieval ChatBot", theme=gr.themes.Citrus(), description=description)
73
+
74
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ langchain-core
2
+ langgraph>0.2.27
3
+ langchain_mistralai
4
+ gradio