ericbanzuzi commited on
Commit
20e8047
·
1 Parent(s): 8100cb2

testing open model

Browse files
Files changed (1) hide show
  1. app.py +19 -61
app.py CHANGED
@@ -1,73 +1,31 @@
1
- import gradio as gr
2
  from llama_cpp import Llama
3
- from huggingface_hub import hf_hub_download
4
-
5
- # Download the file from Hugging Face
6
- model_path = hf_hub_download(repo_id="rcarioniporras/model", filename="unsloth.Q4_K_M.gguf")
7
-
8
- # Load the model using llama_cpp
9
- llm = Llama(model_path=model_path, verbose=False)
10
-
11
-
12
- # """
13
- # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
14
- # """
15
- # llm = Llama.from_pretrained(
16
- # repo_id="rcarioniporras/model",
17
- # filename="*Q4_K_M.gguf",
18
- # verbose=False
19
- # )
20
-
21
- def respond(
22
- message,
23
- history: list[tuple[str, str]],
24
- system_message,
25
- max_tokens,
26
- temperature,
27
- top_p,
28
- ):
29
- messages = [{"role": "system", "content": system_message}]
30
 
31
- for val in history:
32
- if val[0]:
33
- messages.append({"role": "user", "content": val[0]})
34
- if val[1]:
35
- messages.append({"role": "assistant", "content": val[1]})
36
 
 
 
 
 
 
 
 
37
  messages.append({"role": "user", "content": message})
38
-
39
  response = ""
40
- for message in llm.chat_completion(
41
- messages,
42
- max_tokens=max_tokens,
43
  stream=True,
44
- temperature=temperature,
45
- top_p=top_p,
46
  ):
47
- token = message.choices[0].delta.content
48
-
49
- response += token
50
  yield response
51
 
52
- """
53
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
54
- """
55
- demo = gr.ChatInterface(
56
- respond,
57
- additional_inputs=[
58
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
59
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
60
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
61
- gr.Slider(
62
- minimum=0.1,
63
- maximum=1.0,
64
- value=0.95,
65
- step=0.05,
66
- label="Top-p (nucleus sampling)",
67
- ),
68
- ],
69
- )
70
-
71
 
72
  if __name__ == "__main__":
73
  demo.launch()
 
 
1
  from llama_cpp import Llama
2
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ llm = Llama.from_pretrained(
5
+ repo_id="Robzy/Llama-3.2-1B-Instruct-Finetuned-q4_k_m",
6
+ filename="unsloth.Q4_K_M.gguf",
7
+ )
 
8
 
9
+ def predict(message, history):
10
+ messages = [{"role": "system", "content": "You are a helpful assistant."}]
11
+ for user_message, bot_message in history:
12
+ if user_message:
13
+ messages.append({"role": "user", "content": user_message})
14
+ if bot_message:
15
+ messages.append({"role": "assistant", "content": bot_message})
16
  messages.append({"role": "user", "content": message})
17
+
18
  response = ""
19
+ for chunk in llm.create_chat_completion(
 
 
20
  stream=True,
21
+ messages=messages,
 
22
  ):
23
+ part = chunk["choices"][0]["delta"].get("content", None)
24
+ if part:
25
+ response += part
26
  yield response
27
 
28
+ demo = gr.ChatInterface(predict)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  if __name__ == "__main__":
31
  demo.launch()