csabakecskemeti commited on
Commit
f818900
·
verified ·
1 Parent(s): fdb9486

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -98
app.py CHANGED
@@ -1,99 +1,53 @@
 
1
  import gradio as gr
2
- import requests, json
3
-
4
- public_ip = '71.202.66.108'
5
-
6
- model = 'llama3.1:latest' # You can replace the model name if needed
7
- context = []
8
-
9
- ollama_serve = f"http://{public_ip}:11434/api/generate"
10
-
11
- # Call Ollama API
12
- def generate(prompt, context, top_k, top_p, temp):
13
- r = requests.post(ollama_serve,
14
- json={
15
- 'model': model,
16
- 'prompt': prompt,
17
- 'context': context,
18
- 'options': {
19
- 'top_k': top_k,
20
- 'temperature': top_p,
21
- 'top_p': temp
22
- }
23
- },
24
- stream=True)
25
- r.raise_for_status()
26
-
27
- response = ""
28
-
29
- for line in r.iter_lines():
30
- body = json.loads(line)
31
- response_part = body.get('response', '')
32
-
33
- if 'error' in body:
34
- yield f"Error: {body['error']}"
35
- return
36
-
37
- # Append token to the growing response and yield the entire response so far
38
- if response_part:
39
- response += response_part
40
- yield response # Yield the growing response incrementally
41
-
42
- if body.get('done', False):
43
- context = body.get('context', [])
44
- return # End the generator once done
45
-
46
- def chat(input, chat_history, top_k, top_p, temp):
47
- chat_history = chat_history or []
48
- global context
49
-
50
- # Initialize the user input as part of the chat history
51
- chat_history.append((input, "")) # Add user input first
52
- response = "" # Initialize empty response
53
-
54
- # Stream each part of the response as it's received
55
- response_stream = generate(input, context, top_k, top_p, temp)
56
-
57
- for response_part in response_stream:
58
- response = response_part # Keep updating with the new part of the response
59
- # Update the latest assistant response (the second part of the tuple)
60
- chat_history[-1] = (input, response)
61
- yield chat_history, chat_history # Yield the updated chat history
62
-
63
-
64
- ######################### Gradio Code ##########################
65
- # background-image: url('https://cdn.shoplightspeed.com/shops/631940/files/45845092/800x800x3/apple-apple-macpro-trashcan-12-core-27ghz-64gb-1tb.jpg');
66
- block = gr.Blocks(css="""
67
- .chatbox {
68
- background-image: url('https://cdn.shoplightspeed.com/shops/631940/files/45845092/800x800x3/apple-apple-macpro-trashcan-12-core-27ghz-64gb-1tb.jpg');
69
- background-size: contain; /* Ensure the image fits the height */
70
- background-repeat: no-repeat;
71
- background-position: center;
72
- height: 100%; /* Make the chatbox fill the available height */
73
- }
74
- """)
75
-
76
-
77
-
78
- with block:
79
-
80
- gr.Markdown("""<h1><center> Trashcan AI </center></h1>""")
81
- gr.Markdown("""<h3><center> LLama3.1 hosted on a 2013 "Trashcan" Mac Pro with ollama </center></h3>""")
82
-
83
- # Add a custom class 'chatbox' to apply the background image
84
- chatbot = gr.Chatbot(elem_classes="chatbox")
85
- message = gr.Textbox(placeholder="Type here")
86
-
87
- state = gr.State()
88
- with gr.Row():
89
- top_k = gr.Slider(0.0, 100.0, label="top_k", value=40, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)")
90
- top_p = gr.Slider(0.0, 1.0, label="top_p", value=0.9, info="Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)")
91
- temp = gr.Slider(0.0, 2.0, label="temperature", value=0.8, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)")
92
-
93
- submit = gr.Button("SEND")
94
-
95
- # Use .click() to trigger the response streaming
96
- submit.click(chat, inputs=[message, state, top_k, top_p, temp], outputs=[chatbot, state])
97
-
98
- if __name__ == "__main__":
99
- block.launch()
 
1
+ import os
2
  import gradio as gr
3
+ import ollama
4
+ public_ip = os.environ['PUBLIC_IP']
5
+ port = os.environ['PORT']
6
+
7
+ model = 'llama3.1'
8
+ from ollama import Client
9
+ client = Client(host=f'http://{public_ip}:{port}')
10
+
11
+ def format_history(msg: str, history: list[list[str, str]], system_prompt: str):
12
+ chat_history = [{"role": "system", "content":system_prompt}]
13
+ for query, response in history:
14
+ chat_history.append({"role": "user", "content": query})
15
+ chat_history.append({"role": "assistant", "content": response})
16
+ chat_history.append({"role": "user", "content": msg})
17
+ return chat_history
18
+
19
+ def generate_response(msg: str, history: list[list[str, str]], system_prompt: str, top_k: int, top_p: float, temperature: float):
20
+ chat_history = format_history(msg, history, system_prompt)
21
+ response = client.chat(model=model,
22
+ stream=True,
23
+ messages=chat_history,
24
+ options={'top_k':top_k, 'top_p':top_p, 'temperature':temperature})
25
+ message = ""
26
+ for partial_resp in response:
27
+ token = partial_resp["message"]["content"]
28
+ message += token
29
+ yield message
30
+
31
+
32
+ chatbot = gr.ChatInterface(
33
+ generate_response,
34
+ chatbot=gr.Chatbot(
35
+ avatar_images=["user.png", "chatbot.png"],
36
+ height="64vh"
37
+ ),
38
+ additional_inputs=[
39
+ gr.Textbox("You are a helpful assistant and always try to answer user queries to the best of your ability.", label="System Prompt"),
40
+ gr.Slider(0.0,100.0, label="top_k", value=40, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)"),
41
+ gr.Slider(0.0,1.0, label="top_p", value=0.9, info=" Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)"),
42
+ gr.Slider(0.0,2.0, label="temperature", value=0.4, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)"),
43
+ ],
44
+ title="Trashcan AI",
45
+ description="LLama3.1 hosted on a 2013 \"Trashcan\" Mac Pro with ollama",
46
+ theme="finlaymacklon/smooth_slate",
47
+ submit_btn="Send",
48
+ retry_btn="🔄 Regenerate Response",
49
+ undo_btn="↩ Delete Previous",
50
+ clear_btn="🗑️ Clear Chat"
51
+ )
52
+
53
+ chatbot.queue().launch()