Tijmen2 commited on
Commit
b5aa0f1
·
verified ·
1 Parent(s): ea879f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -27
app.py CHANGED
@@ -1,44 +1,66 @@
 
1
  import gradio as gr
2
- from llama_cpp import Llama
3
-
4
- llm = Llama(
5
- "AstroMLab/AstroSage-8B-GGUF",
6
- n_ctx=8192,
7
- n_threads=4,
8
- seed=42,
9
- f16_kv=True,
10
- logits_all=False,
11
- use_mmap=True,
12
- use_gpu=True
13
- )
14
 
15
- def respond(message, history, system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
16
  messages = [{"role": "system", "content": system_message}]
17
- for user_msg, assistant_msg in history:
18
- if user_msg:
19
- messages.append({"role": "user", "content": user_msg})
20
- if assistant_msg:
21
- messages.append({"role": "assistant", "content": assistant_msg})
 
 
22
  messages.append({"role": "user", "content": message})
23
-
24
- response = llm.generate_chat(
 
 
25
  messages,
26
  max_tokens=max_tokens,
 
27
  temperature=temperature,
28
- top_p=top_p
29
- )
30
-
31
- return response
 
 
32
 
 
 
 
 
33
  demo = gr.ChatInterface(
34
  respond,
35
  additional_inputs=[
36
  gr.Textbox(value="Assume the role of AstroSage, a helpful chatbot designed to answer user queries about astronomy, astrophysics, and cosmology.", label="System message"),
37
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
38
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
39
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
40
- ]
 
 
 
 
 
 
41
  )
42
 
 
43
  if __name__ == "__main__":
44
- demo.launch()
 
 
1
+
2
  import gradio as gr
3
+ from huggingface_hub import InferenceClient
4
+
5
+ """
6
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
+ """
8
+ client = InferenceClient("https://huggingface.co/AstroMLab/AstroSage-8B-GGUF/blob/main/AstroSage-8B-Q8_0.gguf")
 
 
 
 
 
 
9
 
10
+
11
+ def respond(
12
+ message,
13
+ history: list[tuple[str, str]],
14
+ system_message,
15
+ max_tokens,
16
+ temperature,
17
+ top_p,
18
+ ):
19
  messages = [{"role": "system", "content": system_message}]
20
+
21
+ for val in history:
22
+ if val[0]:
23
+ messages.append({"role": "user", "content": val[0]})
24
+ if val[1]:
25
+ messages.append({"role": "assistant", "content": val[1]})
26
+
27
  messages.append({"role": "user", "content": message})
28
+
29
+ response = ""
30
+
31
+ for message in client.chat_completion(
32
  messages,
33
  max_tokens=max_tokens,
34
+ stream=True,
35
  temperature=temperature,
36
+ top_p=top_p,
37
+ ):
38
+ token = message.choices[0].delta.content
39
+
40
+ response += token
41
+ yield response
42
 
43
+
44
+ """
45
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
46
+ """
47
  demo = gr.ChatInterface(
48
  respond,
49
  additional_inputs=[
50
  gr.Textbox(value="Assume the role of AstroSage, a helpful chatbot designed to answer user queries about astronomy, astrophysics, and cosmology.", label="System message"),
51
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
+ gr.Slider(
54
+ minimum=0.1,
55
+ maximum=1.0,
56
+ value=0.95,
57
+ step=0.05,
58
+ label="Top-p (nucleus sampling)",
59
+ ),
60
+ ],
61
  )
62
 
63
+
64
  if __name__ == "__main__":
65
+ demo.launch()
66
+