lightmate commited on
Commit
1c3f8cd
·
verified ·
1 Parent(s): 6451b24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -49
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  from pathlib import Path
3
  import requests
4
- import shutil
5
  import torch
6
  from threading import Event, Thread
7
  from transformers import AutoConfig, AutoTokenizer
@@ -17,16 +16,18 @@ from llm_config import SUPPORTED_LLM_MODELS
17
  # Initialize model language options
18
  model_languages = list(SUPPORTED_LLM_MODELS)
19
 
20
- # Gradio Interface inside Blocks
21
  with gr.Blocks() as iface:
 
22
  model_language = gr.Dropdown(
23
  choices=model_languages,
24
  value=model_languages[0],
25
  label="Model Language"
26
  )
27
 
 
28
  model_id = gr.Dropdown(
29
- choices=[], # will be dynamically populated
30
  label="Model",
31
  value=None
32
  )
@@ -34,34 +35,34 @@ with gr.Blocks() as iface:
34
  # Function to update model_id dropdown choices based on model_language
35
  def update_model_id(model_language_value):
36
  model_ids = list(SUPPORTED_LLM_MODELS[model_language_value])
37
- return gr.update(value=model_ids[0], choices=model_ids)
38
 
 
39
  model_language.change(update_model_id, inputs=model_language, outputs=model_id)
40
 
41
- # Gradio checkbox for preparing INT4 model
42
  prepare_int4_model = gr.Checkbox(
43
  value=True,
44
  label="Prepare INT4 Model"
45
  )
46
 
47
- # Gradio checkbox for enabling AWQ (depends on INT4 checkbox)
48
  enable_awq = gr.Checkbox(
49
  value=False,
50
  label="Enable AWQ",
51
- visible=False
52
  )
53
 
54
- # Gradio dropdown for device selection
55
  device = gr.Dropdown(
56
  choices=["CPU", "GPU"],
57
  value="CPU",
58
  label="Device"
59
  )
60
 
61
- # Model directory and setup based on selections
62
  def get_model_path(model_language_value, model_id_value):
63
  model_configuration = SUPPORTED_LLM_MODELS[model_language_value][model_id_value]
64
- pt_model_id = model_configuration["model_id"]
65
  pt_model_name = model_id_value.split("-")[0]
66
  int4_model_dir = Path(model_id_value) / "INT4_compressed_weights"
67
  return model_configuration, int4_model_dir, pt_model_name
@@ -69,54 +70,44 @@ with gr.Blocks() as iface:
69
  # Function to download the model if not already present
70
  def download_model_if_needed(model_language_value, model_id_value):
71
  model_configuration, int4_model_dir, pt_model_name = get_model_path(model_language_value, model_id_value)
72
-
73
  int4_weights = int4_model_dir / "openvino_model.bin"
74
-
75
  if not int4_weights.exists():
76
  print(f"Downloading model {model_id_value}...")
77
- # Add your download logic here (e.g., from a URL)
78
- # Example:
79
- # r = requests.get(model_configuration["model_url"])
80
- # with open(int4_weights, "wb") as f:
81
- # f.write(r.content)
82
-
83
  return int4_model_dir
84
 
85
- # Load the model
86
  def load_model(model_language_value, model_id_value):
87
  int4_model_dir = download_model_if_needed(model_language_value, model_id_value)
88
-
89
- # Load the OpenVINO model
90
- ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
 
 
91
  core = ov.Core()
92
-
93
- model_dir = int4_model_dir
94
- model_configuration = SUPPORTED_LLM_MODELS[model_language_value][model_id_value]
95
-
96
- tok = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
97
  ov_model = OVModelForCausalLM.from_pretrained(
98
- model_dir,
99
- device=device.value, # Use Gradio dropdown value for device
100
  ov_config=ov_config,
101
- config=AutoConfig.from_pretrained(model_dir, trust_remote_code=True),
102
  trust_remote_code=True
103
  )
104
-
105
- return tok, ov_model, model_configuration
106
 
107
- # Gradio UI for temperature and other model parameters
108
  temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
109
  top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P")
110
  top_k = gr.Slider(minimum=0, maximum=50, value=50, label="Top K")
111
  repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, label="Repetition Penalty")
112
 
113
- # Conversation history input/output
114
- history = gr.State([]) # store the conversation history
115
 
116
- # Gradio function for generating responses
117
  def generate_response(history, temperature, top_p, top_k, repetition_penalty, model_language_value, model_id_value):
118
- tok, ov_model, model_configuration = load_model(model_language_value, model_id_value)
119
-
120
  def convert_history_to_token(history):
121
  input_tokens = tok(" ".join([msg[0] for msg in history]), return_tensors="pt").input_ids
122
  return input_tokens
@@ -148,23 +139,15 @@ with gr.Blocks() as iface:
148
  history[-1][1] = partial_text
149
  yield history
150
 
151
- # Interface setup
152
  iface = gr.Interface(
153
  fn=generate_response,
154
- inputs=[
155
- history,
156
- temperature,
157
- top_p,
158
- top_k,
159
- repetition_penalty,
160
- model_language,
161
- model_id
162
- ],
163
  outputs=[gr.Textbox(label="Conversation History"), history],
164
  live=True,
165
  title="OpenVINO Chatbot"
166
  )
167
 
168
- # Launch Gradio app
169
  if __name__ == "__main__":
170
  iface.launch(debug=True, share=True, server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
  from pathlib import Path
3
  import requests
 
4
  import torch
5
  from threading import Event, Thread
6
  from transformers import AutoConfig, AutoTokenizer
 
16
  # Initialize model language options
17
  model_languages = list(SUPPORTED_LLM_MODELS)
18
 
19
+ # Define Gradio interface within a Blocks context
20
  with gr.Blocks() as iface:
21
+ # Dropdown for model language selection
22
  model_language = gr.Dropdown(
23
  choices=model_languages,
24
  value=model_languages[0],
25
  label="Model Language"
26
  )
27
 
28
+ # Dropdown for model ID, dynamically populated
29
  model_id = gr.Dropdown(
30
+ choices=[], # will be populated dynamically
31
  label="Model",
32
  value=None
33
  )
 
35
  # Function to update model_id dropdown choices based on model_language
36
  def update_model_id(model_language_value):
37
  model_ids = list(SUPPORTED_LLM_MODELS[model_language_value])
38
+ return gr.Dropdown.update(value=model_ids[0], choices=model_ids)
39
 
40
+ # Update model_id choices when model_language changes
41
  model_language.change(update_model_id, inputs=model_language, outputs=model_id)
42
 
43
+ # Checkbox for INT4 model preparation
44
  prepare_int4_model = gr.Checkbox(
45
  value=True,
46
  label="Prepare INT4 Model"
47
  )
48
 
49
+ # Checkbox for enabling AWQ (shown conditionally)
50
  enable_awq = gr.Checkbox(
51
  value=False,
52
  label="Enable AWQ",
53
+ visible=False # visibility can be controlled in the UI logic
54
  )
55
 
56
+ # Dropdown for device selection
57
  device = gr.Dropdown(
58
  choices=["CPU", "GPU"],
59
  value="CPU",
60
  label="Device"
61
  )
62
 
63
+ # Function to retrieve model configuration and path
64
  def get_model_path(model_language_value, model_id_value):
65
  model_configuration = SUPPORTED_LLM_MODELS[model_language_value][model_id_value]
 
66
  pt_model_name = model_id_value.split("-")[0]
67
  int4_model_dir = Path(model_id_value) / "INT4_compressed_weights"
68
  return model_configuration, int4_model_dir, pt_model_name
 
70
  # Function to download the model if not already present
71
  def download_model_if_needed(model_language_value, model_id_value):
72
  model_configuration, int4_model_dir, pt_model_name = get_model_path(model_language_value, model_id_value)
 
73
  int4_weights = int4_model_dir / "openvino_model.bin"
 
74
  if not int4_weights.exists():
75
  print(f"Downloading model {model_id_value}...")
76
+ # Download logic (e.g., requests.get(model_configuration["model_url"])) can go here
 
 
 
 
 
77
  return int4_model_dir
78
 
79
+ # Load the model based on selected options
80
  def load_model(model_language_value, model_id_value):
81
  int4_model_dir = download_model_if_needed(model_language_value, model_id_value)
82
+ ov_config = {
83
+ hints.performance_mode(): hints.PerformanceMode.LATENCY,
84
+ streams.num(): "1",
85
+ props.cache_dir(): ""
86
+ }
87
  core = ov.Core()
88
+ tok = AutoTokenizer.from_pretrained(int4_model_dir, trust_remote_code=True)
 
 
 
 
89
  ov_model = OVModelForCausalLM.from_pretrained(
90
+ int4_model_dir,
91
+ device=device.value,
92
  ov_config=ov_config,
93
+ config=AutoConfig.from_pretrained(int4_model_dir, trust_remote_code=True),
94
  trust_remote_code=True
95
  )
96
+ return tok, ov_model
 
97
 
98
+ # Gradio sliders for model generation parameters
99
  temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
100
  top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P")
101
  top_k = gr.Slider(minimum=0, maximum=50, value=50, label="Top K")
102
  repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, label="Repetition Penalty")
103
 
104
+ # Conversation history state
105
+ history = gr.State([])
106
 
107
+ # Function to generate responses based on model and input
108
  def generate_response(history, temperature, top_p, top_k, repetition_penalty, model_language_value, model_id_value):
109
+ tok, ov_model = load_model(model_language_value, model_id_value)
110
+
111
  def convert_history_to_token(history):
112
  input_tokens = tok(" ".join([msg[0] for msg in history]), return_tensors="pt").input_ids
113
  return input_tokens
 
139
  history[-1][1] = partial_text
140
  yield history
141
 
142
+ # Set up the interface with inputs and outputs
143
  iface = gr.Interface(
144
  fn=generate_response,
145
+ inputs=[history, temperature, top_p, top_k, repetition_penalty, model_language, model_id],
 
 
 
 
 
 
 
 
146
  outputs=[gr.Textbox(label="Conversation History"), history],
147
  live=True,
148
  title="OpenVINO Chatbot"
149
  )
150
 
151
+ # Launch the Gradio app
152
  if __name__ == "__main__":
153
  iface.launch(debug=True, share=True, server_name="0.0.0.0", server_port=7860)