zerostratos commited on
Commit
3462e3a
·
verified ·
1 Parent(s): 40db491

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -47
app.py CHANGED
@@ -1,51 +1,62 @@
1
- #pip install huggingface_hub
2
-
3
- #export HF_TOKEN="<>"
4
-
5
-
6
-
7
  from huggingface_hub import InferenceClient
8
-
9
- import json
10
-
11
-
12
-
13
- repo_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
14
-
15
-
16
-
17
- llm_client = InferenceClient(
18
-
19
- model=repo_id,
20
-
21
- timeout=120,
22
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  )
24
 
25
 
26
-
27
- def call_llm(inference_client: InferenceClient, prompt: str):
28
-
29
- response = inference_client.post(
30
-
31
- json={
32
-
33
- "inputs": prompt,
34
-
35
- "parameters": {"max_new_tokens": 200},
36
-
37
- "task": "text-generation",
38
-
39
- },
40
-
41
- )
42
-
43
- return json.loads(response.decode())[0]["generated_text"]
44
-
45
-
46
-
47
-
48
-
49
- response=call_llm(llm_client, "write me a crazy joke")
50
-
51
- print (response)
 
1
+ import gradio as gr
 
 
 
 
 
2
  from huggingface_hub import InferenceClient
3
+ """
4
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
5
+ """
6
+ client = InferenceClient("zerostratos/test")
7
+
8
+ def respond(
9
+ message,
10
+ history: list[tuple[str, str]],
11
+ system_message,
12
+ max_tokens,
13
+ temperature,
14
+ top_p,
15
+ ):
16
+ messages = [{"role": "system", "content": system_message}]
17
+
18
+ for val in history:
19
+ if val[0]:
20
+ messages.append({"role": "user", "content": val[0]})
21
+ if val[1]:
22
+ messages.append({"role": "assistant", "content": val[1]})
23
+
24
+ messages.append({"role": "user", "content": message})
25
+
26
+ response = ""
27
+
28
+ for message in client.chat_completion(
29
+ messages,
30
+ max_tokens=max_tokens,
31
+ stream=True,
32
+ temperature=temperature,
33
+ top_p=top_p,
34
+ ):
35
+ token = message.choices[0].delta.content
36
+
37
+ response += token
38
+ yield response
39
+
40
+
41
+ """
42
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
43
+ """
44
+ demo = gr.ChatInterface(
45
+ respond,
46
+ additional_inputs=[
47
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
48
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
49
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
50
+ gr.Slider(
51
+ minimum=0.1,
52
+ maximum=1.0,
53
+ value=0.95,
54
+ step=0.05,
55
+ label="Top-p (nucleus sampling)",
56
+ ),
57
+ ],
58
  )
59
 
60
 
61
+ if __name__ == "__main__":
62
+ demo.launch()