Robzy commited on
Commit
ce0b3e9
·
1 Parent(s): c7f6c4f

new models

Browse files
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -3,13 +3,18 @@ from llama_cpp import Llama
3
 
4
  # Load models
5
  llm = Llama.from_pretrained(
6
- repo_id="Robzy/Llama-3.2-1B-Instruct-Finetuned-q4_k_m",
7
  filename="unsloth.Q4_K_M.gguf",
8
  )
9
 
10
  llm2 = Llama.from_pretrained(
11
- repo_id="Robzy/Llama-3.2-1B-Instruct-Finetuned-CodeData-q4_k_m",
12
- filename="unsloth.Q4_K_M.gguf",
 
 
 
 
 
13
  )
14
 
15
  # Define prediction functions
@@ -62,7 +67,7 @@ def predict3(message, history, model):
62
  messages.append({"role": "user", "content": message})
63
 
64
  response = ""
65
- for chunk in llm2.create_chat_completion(
66
  stream=True,
67
  messages=messages,
68
  ):
@@ -75,12 +80,10 @@ def predict3(message, history, model):
75
 
76
  # Define ChatInterfaces
77
  io1 = gr.ChatInterface(predict, title="4-bit")
78
- io2 = gr.ChatInterface(predict2, title="8-bit") # Placeholder
79
- io3 = gr.ChatInterface(predict3, title="16-bit")
80
- io4 = gr.ChatInterface(predict2, title="32-bit") # Placeholder
81
-
82
  # Dropdown and visibility mapping
83
- chat_interfaces = {"4-bit": io1, "8-bit": io2, "16-bit": io3, "32-bit": io4}
84
 
85
  # Define UI
86
  with gr.Blocks() as demo:
@@ -88,13 +91,9 @@ with gr.Blocks() as demo:
88
 
89
  with gr.Tab("4-bit"):
90
  io1.render()
91
- with gr.Tab("8-bit"):
92
  io2.render()
93
- with gr.Tab("16-bit"):
94
  io3.render()
95
- with gr.Tab("32-bit"):
96
- io4.render()
97
 
98
-
99
-
100
  demo.launch()
 
3
 
4
  # Load models
5
  llm = Llama.from_pretrained(
6
+ repo_id="Robzy/lora_model_CodeData_120k",
7
  filename="unsloth.Q4_K_M.gguf",
8
  )
9
 
10
  llm2 = Llama.from_pretrained(
11
+ repo_id="Robzy/lora_model_CodeData_120k",
12
+ filename="unsloth.Q5_K_M.gguf",
13
+ )
14
+
15
+ llm3 = Llama.from_pretrained(
16
+ repo_id="Robzy/lora_model_CodeData_120k",
17
+ filename="unsloth.Q8_0.gguf",
18
  )
19
 
20
  # Define prediction functions
 
67
  messages.append({"role": "user", "content": message})
68
 
69
  response = ""
70
+ for chunk in llm3.create_chat_completion(
71
  stream=True,
72
  messages=messages,
73
  ):
 
80
 
81
  # Define ChatInterfaces
82
  io1 = gr.ChatInterface(predict, title="4-bit")
83
+ io2 = gr.ChatInterface(predict2, title="5-bit") # Placeholder
84
+ io3 = gr.ChatInterface(predict3, title="8-bit")
 
 
85
  # Dropdown and visibility mapping
86
+ chat_interfaces = {"4-bit": io1, "5-bit": io2, "8-bit": io3}
87
 
88
  # Define UI
89
  with gr.Blocks() as demo:
 
91
 
92
  with gr.Tab("4-bit"):
93
  io1.render()
94
+ with gr.Tab("5-bit"):
95
  io2.render()
96
+ with gr.Tab("8-bit"):
97
  io3.render()
 
 
98
 
 
 
99
  demo.launch()