Update app.py
Browse files
app.py
CHANGED
@@ -19,15 +19,11 @@ class StopOnTokens(StoppingCriteria):
|
|
19 |
|
20 |
# Define prediction function for the chat interface
|
21 |
def predict(message, history):
|
22 |
-
#
|
23 |
-
|
24 |
-
stop = StopOnTokens()
|
25 |
-
|
26 |
-
# Concatenate previous messages and the user's input
|
27 |
-
messages = "".join([f"\n### user : {item[0]} \n### bot : {item[1]}" for item in history_transformer_format])
|
28 |
|
29 |
# Tokenize the input
|
30 |
-
model_inputs = tokenizer([
|
31 |
|
32 |
# Set up the streamer for partial message output
|
33 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
@@ -36,7 +32,7 @@ def predict(message, history):
|
|
36 |
generate_kwargs = dict(
|
37 |
model_inputs,
|
38 |
streamer=streamer,
|
39 |
-
max_new_tokens=1024
|
40 |
)
|
41 |
|
42 |
# Run generation in a separate thread
|
@@ -51,4 +47,4 @@ def predict(message, history):
|
|
51 |
yield partial_message
|
52 |
|
53 |
# Create the chat interface using Gradio
|
54 |
-
gr.ChatInterface(fn=predict, title="Monlam LLM
|
|
|
19 |
|
20 |
# Define prediction function for the chat interface
|
21 |
def predict(message, history):
|
22 |
+
# Format the input according to your specified structure
|
23 |
+
formatted_input = f"### user : {message} ### input: ### answer:"
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# Tokenize the input
|
26 |
+
model_inputs = tokenizer([formatted_input], return_tensors="pt").to("cuda")
|
27 |
|
28 |
# Set up the streamer for partial message output
|
29 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
|
|
32 |
generate_kwargs = dict(
|
33 |
model_inputs,
|
34 |
streamer=streamer,
|
35 |
+
max_new_tokens=1024
|
36 |
)
|
37 |
|
38 |
# Run generation in a separate thread
|
|
|
47 |
yield partial_message
|
48 |
|
49 |
# Create the chat interface using Gradio
|
50 |
+
gr.ChatInterface(fn=predict, title="Monlam LLM", description="").launch(share=True)
|