phate334 commited on
Commit
863856d
1 Parent(s): 6160b72

[add] device model

Browse files
Files changed (4) hide show
  1. .vscode/settings.json +1 -1
  2. app/devices.py +7 -0
  3. devices.json +15 -15
  4. main.py +17 -6
.vscode/settings.json CHANGED
@@ -3,7 +3,7 @@
3
  "editor.defaultFormatter": "ms-python.black-formatter",
4
  "editor.formatOnSave": true,
5
  "editor.codeActionsOnSave": {
6
- "source.organizeImports": true
7
  },
8
  },
9
  "isort.args":["--profile", "black"],
 
3
  "editor.defaultFormatter": "ms-python.black-formatter",
4
  "editor.formatOnSave": true,
5
  "editor.codeActionsOnSave": {
6
+ "source.organizeImports": "explicit"
7
  },
8
  },
9
  "isort.args":["--profile", "black"],
app/devices.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class Device(BaseModel):
5
+ memory_size: int = Field(alias="memorySize")
6
+ memory_bandwidth: float = Field(alias="memoryBandwidth")
7
+ FLOPS: str
devices.json CHANGED
@@ -1,15 +1,15 @@
1
- [
2
- {"model": "V100", "memory-size": 32, "memory-bandwidth": 900, "FLOPS": "112.224TFLOPS"},
3
- {"model": "T4", "memory-size": 16, "memory-bandwidth": 320, "FLOPS": "64.8TFLOPS"},
4
- {"model": "A2", "memory-size": 16, "memory-bandwidth": 200, "FLOPS": "18.124TFLOPS"},
5
- {"model": "A10", "memory-size": 24, "memory-bandwidth": 600, "FLOPS": "124.96TFLOPS"},
6
- {"model": "A16*4", "memory-size": 64, "memory-bandwidth": 800, "FLOPS": "73.728TFLOPS"},
7
- {"model": "A30", "memory-size": 24, "memory-bandwidth": 933.1, "FLOPS": "165.12TFLOPS"},
8
- {"model": "A40", "memory-size": 48, "memory-bandwidth": 695.8, "FLOPS": "149.68TFLOPS"},
9
- {"model": "A100-40GB", "memory-size": 40, "memory-bandwidth": 1555, "FLOPS": "312.0TFLOPS"},
10
- {"model": "A100-80GB", "memory-size": 80, "memory-bandwidth": 1555, "FLOPS": "312.0TFLOPS"},
11
- {"model": "H100-PCIE", "memory-size": 80, "memory-bandwidth": 2039, "FLOPS": "756.449TFLOPS"},
12
- {"model": "H100-SXM", "memory-size": 80, "memory-bandwidth": 3352, "FLOPS": "989.43TFLOPS"},
13
- {"model": "L40", "memory-size": 48, "memory-bandwidth": 864, "FLOPS": "362.066TFLOPS"},
14
- {"model": "L4", "memory-size": 24, "memory-bandwidth": 300, "FLOPS": "121.0TFLOPS"}
15
- ]
 
1
+ {
2
+ "V100": {"memorySize": 32, "memoryBandwidth": 900, "FLOPS": "112.224TFLOPS"},
3
+ "T4": {"memorySize": 16, "memoryBandwidth": 320, "FLOPS": "64.8TFLOPS"},
4
+ "A2": {"memorySize": 16, "memoryBandwidth": 200, "FLOPS": "18.124TFLOPS"},
5
+ "A10": {"memorySize": 24, "memoryBandwidth": 600, "FLOPS": "124.96TFLOPS"},
6
+ "A16*4": {"memorySize": 64, "memoryBandwidth": 800, "FLOPS": "73.728TFLOPS"},
7
+ "A30": {"memorySize": 24, "memoryBandwidth": 933.1, "FLOPS": "165.12TFLOPS"},
8
+ "A40": {"memorySize": 48, "memoryBandwidth": 695.8, "FLOPS": "149.68TFLOPS"},
9
+ "A100-40GB": {"memorySize": 40, "memoryBandwidth": 1555, "FLOPS": "312.0TFLOPS"},
10
+ "A100-80GB": {"memorySize": 80, "memoryBandwidth": 1555, "FLOPS": "312.0TFLOPS"},
11
+ "H100-PCIE": {"memorySize": 80, "memoryBandwidth": 2039, "FLOPS": "756.449TFLOPS"},
12
+ "H100-SXM": {"memorySize": 80, "memoryBandwidth": 3352, "FLOPS": "989.43TFLOPS"},
13
+ "L40": {"memorySize": 48, "memoryBandwidth": 864, "FLOPS": "362.066TFLOPS"},
14
+ "L4": {"memorySize": 24, "memoryBandwidth": 300, "FLOPS": "121.0TFLOPS"}
15
+ }
main.py CHANGED
@@ -5,6 +5,7 @@ from pathlib import Path
5
  import gradio as gr
6
  import pandas as pd
7
 
 
8
  from app.models import GgufParser
9
  from app.tables import get_estimate_df, get_model_info_df
10
 
@@ -13,11 +14,23 @@ gguf_parser = Path("gguf-parser-linux-amd64")
13
  gguf_parser_url = f"https://github.com/gpustack/gguf-parser-go/releases/download/{GGUF_PARSER_VERSION}/{gguf_parser}"
14
  DEFAULT_URL = "https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf"
15
 
 
 
 
16
 
17
- def process_url(url, context_length):
 
 
 
 
 
 
18
  try:
 
 
 
19
  res = os.popen(
20
- f"./{gguf_parser} --ctx-size={context_length} -url {url} --json"
21
  ).read()
22
  parser_result = GgufParser.model_validate_json(res)
23
 
@@ -36,17 +49,15 @@ if __name__ == "__main__":
36
  if not gguf_parser.exists():
37
  os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
38
 
39
- with open("devices.json", "r", encoding="utf-8") as f:
40
- device_list = json.load(f)
41
-
42
  with gr.Blocks(title="GGUF Parser") as iface:
43
  url_input = gr.Textbox(placeholder="Enter GGUF URL", value=DEFAULT_URL)
44
  context_length = gr.Number(label="Context Length", value=8192)
 
45
  submit_btn = gr.Button("Send")
46
 
47
  submit_btn.click(
48
  fn=process_url,
49
- inputs=[url_input, context_length],
50
  outputs=[
51
  gr.DataFrame(label="Model Info"),
52
  gr.DataFrame(label="ESTIMATE"),
 
5
  import gradio as gr
6
  import pandas as pd
7
 
8
+ from app.devices import Device
9
  from app.models import GgufParser
10
  from app.tables import get_estimate_df, get_model_info_df
11
 
 
14
  gguf_parser_url = f"https://github.com/gpustack/gguf-parser-go/releases/download/{GGUF_PARSER_VERSION}/{gguf_parser}"
15
  DEFAULT_URL = "https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf"
16
 
17
+ with open("devices.json", "r", encoding="utf-8") as f:
18
+ data = json.load(f)
19
+ devices = {key: Device(**value) for key, value in data.items()}
20
 
21
+ device_options = [
22
+ f"{key} (Memory: {value.memory_size}GB, Bandwidth: {value.memory_bandwidth}GB/s)"
23
+ for key, value in devices.items()
24
+ ]
25
+
26
+
27
+ def process_url(url, context_length, device_selection):
28
  try:
29
+ # 取得選擇的裝置鍵值
30
+ device_key = device_selection.split(" ")[0]
31
+ selected_device = devices[device_key]
32
  res = os.popen(
33
+ f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
34
  ).read()
35
  parser_result = GgufParser.model_validate_json(res)
36
 
 
49
  if not gguf_parser.exists():
50
  os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
51
 
 
 
 
52
  with gr.Blocks(title="GGUF Parser") as iface:
53
  url_input = gr.Textbox(placeholder="Enter GGUF URL", value=DEFAULT_URL)
54
  context_length = gr.Number(label="Context Length", value=8192)
55
+ device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
56
  submit_btn = gr.Button("Send")
57
 
58
  submit_btn.click(
59
  fn=process_url,
60
+ inputs=[url_input, context_length, device_dropdown],
61
  outputs=[
62
  gr.DataFrame(label="Model Info"),
63
  gr.DataFrame(label="ESTIMATE"),