黄腾 aopstudio commited on
Commit
ce69533
·
1 Parent(s): 6108c20

add support for PerfXCloud (#1883)

Browse files

### What problem does this PR solve?

#1853 add support for PerfXCloud

### Type of change


- [x] New Feature (non-breaking change which adds functionality)

Co-authored-by: Zhedong Cen <[email protected]>

conf/llm_factories.json CHANGED
@@ -2442,6 +2442,158 @@
2442
  "model_type": "chat"
2443
  }
2444
  ]
2445
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2446
  ]
2447
  }
 
2442
  "model_type": "chat"
2443
  }
2444
  ]
2445
+ },
2446
+ {
2447
+ "name": "PerfXCloud",
2448
+ "logo": "",
2449
+ "tags": "LLM,TEXT EMBEDDING",
2450
+ "status": "1",
2451
+ "llm": [
2452
+ {
2453
+ "llm_name": "deepseek-v2-chat",
2454
+ "tags": "LLM,CHAT,4k",
2455
+ "max_tokens": 4096,
2456
+ "model_type": "chat"
2457
+ },
2458
+ {
2459
+ "llm_name": "llama3.1:405b",
2460
+ "tags": "LLM,CHAT,128k",
2461
+ "max_tokens": 131072,
2462
+ "model_type": "chat"
2463
+ },
2464
+ {
2465
+ "llm_name": "Qwen2-72B-Instruct",
2466
+ "tags": "LLM,CHAT,128k",
2467
+ "max_tokens": 131072,
2468
+ "model_type": "chat"
2469
+ },
2470
+ {
2471
+ "llm_name": "Qwen2-72B-Instruct-GPTQ-Int4",
2472
+ "tags": "LLM,CHAT,2k",
2473
+ "max_tokens": 2048,
2474
+ "model_type": "chat"
2475
+ },
2476
+ {
2477
+ "llm_name": "Qwen2-72B-Instruct-awq-int4",
2478
+ "tags": "LLM,CHAT,32k",
2479
+ "max_tokens": 32768,
2480
+ "model_type": "chat"
2481
+ },
2482
+ {
2483
+ "llm_name": "Llama3-Chinese_v2",
2484
+ "tags": "LLM,CHAT,8k",
2485
+ "max_tokens": 8192,
2486
+ "model_type": "chat"
2487
+ },
2488
+ {
2489
+ "llm_name": "Yi-1_5-9B-Chat-16K",
2490
+ "tags": "LLM,CHAT,16k",
2491
+ "max_tokens": 16384,
2492
+ "model_type": "chat"
2493
+ },
2494
+ {
2495
+ "llm_name": "Qwen1.5-72B-Chat-GPTQ-Int4",
2496
+ "tags": "LLM,CHAT,2k",
2497
+ "max_tokens": 2048,
2498
+ "model_type": "chat"
2499
+ },
2500
+ {
2501
+ "llm_name": "Meta-Llama-3.1-8B-Instruct",
2502
+ "tags": "LLM,CHAT,4k",
2503
+ "max_tokens": 4096,
2504
+ "model_type": "chat"
2505
+ },
2506
+ {
2507
+ "llm_name": "Qwen2-7B-Instruct",
2508
+ "tags": "LLM,CHAT,32k",
2509
+ "max_tokens": 32768,
2510
+ "model_type": "chat"
2511
+ },
2512
+ {
2513
+ "llm_name": "deepseek-v2-lite-chat",
2514
+ "tags": "LLM,CHAT,2k",
2515
+ "max_tokens": 2048,
2516
+ "model_type": "chat"
2517
+ },
2518
+ {
2519
+ "llm_name": "Qwen2-7B",
2520
+ "tags": "LLM,CHAT,128k",
2521
+ "max_tokens": 131072,
2522
+ "model_type": "chat"
2523
+ },
2524
+ {
2525
+ "llm_name": "chatglm3-6b",
2526
+ "tags": "LLM,CHAT,8k",
2527
+ "max_tokens": 8192,
2528
+ "model_type": "chat"
2529
+ },
2530
+ {
2531
+ "llm_name": "Meta-Llama-3-70B-Instruct-GPTQ-Int4",
2532
+ "tags": "LLM,CHAT,1k",
2533
+ "max_tokens": 1024,
2534
+ "model_type": "chat"
2535
+ },
2536
+ {
2537
+ "llm_name": "Meta-Llama-3-8B-Instruct",
2538
+ "tags": "LLM,CHAT,8k",
2539
+ "max_tokens": 8192,
2540
+ "model_type": "chat"
2541
+ },
2542
+ {
2543
+ "llm_name": "Mistral-7B-Instruct",
2544
+ "tags": "LLM,CHAT,32k",
2545
+ "max_tokens": 32768,
2546
+ "model_type": "chat"
2547
+ },
2548
+ {
2549
+ "llm_name": "MindChat-Qwen-7B-v2",
2550
+ "tags": "LLM,CHAT,2k",
2551
+ "max_tokens": 2048,
2552
+ "model_type": "chat"
2553
+ },
2554
+ {
2555
+ "llm_name": "phi-2",
2556
+ "tags": "LLM,CHAT,2k",
2557
+ "max_tokens": 2048,
2558
+ "model_type": "chat"
2559
+ },
2560
+ {
2561
+ "llm_name": "SOLAR-10_7B-Instruct",
2562
+ "tags": "LLM,CHAT,4k",
2563
+ "max_tokens": 4096,
2564
+ "model_type": "chat"
2565
+ },
2566
+ {
2567
+ "llm_name": "Mixtral-8x7B-Instruct-v0.1-GPTQ",
2568
+ "tags": "LLM,CHAT,32k",
2569
+ "max_tokens": 32768,
2570
+ "model_type": "chat"
2571
+ },
2572
+ {
2573
+ "llm_name": "Qwen1.5-7B",
2574
+ "tags": "LLM,CHAT,32k",
2575
+ "max_tokens": 32768,
2576
+ "model_type": "chat"
2577
+ },
2578
+ {
2579
+ "llm_name": "BAAI/bge-large-en-v1.5",
2580
+ "tags": "TEXT EMBEDDING",
2581
+ "max_tokens": 512,
2582
+ "model_type": "embedding"
2583
+ },
2584
+ {
2585
+ "llm_name": "BAAI/bge-large-zh-v1.5",
2586
+ "tags": "TEXT EMBEDDING",
2587
+ "max_tokens": 1024,
2588
+ "model_type": "embedding"
2589
+ },
2590
+ {
2591
+ "llm_name": "BAAI/bge-m3",
2592
+ "tags": "TEXT EMBEDDING",
2593
+ "max_tokens": 8192,
2594
+ "model_type": "embedding"
2595
+ }
2596
+ ]
2597
+ }
2598
  ]
2599
  }
rag/llm/__init__.py CHANGED
@@ -38,7 +38,8 @@ EmbeddingModel = {
38
  "NVIDIA": NvidiaEmbed,
39
  "LM-Studio": LmStudioEmbed,
40
  "OpenAI-API-Compatible": OpenAI_APIEmbed,
41
- "cohere": CoHereEmbed
 
42
  }
43
 
44
 
@@ -84,7 +85,8 @@ ChatModel = {
84
  "LM-Studio": LmStudioChat,
85
  "OpenAI-API-Compatible": OpenAI_APIChat,
86
  "cohere": CoHereChat,
87
- "LeptonAI": LeptonAIChat
 
88
  }
89
 
90
 
 
38
  "NVIDIA": NvidiaEmbed,
39
  "LM-Studio": LmStudioEmbed,
40
  "OpenAI-API-Compatible": OpenAI_APIEmbed,
41
+ "cohere": CoHereEmbed,
42
+ "PerfXCloud": PerfXCloudEmbed,
43
  }
44
 
45
 
 
85
  "LM-Studio": LmStudioChat,
86
  "OpenAI-API-Compatible": OpenAI_APIChat,
87
  "cohere": CoHereChat,
88
+ "LeptonAI": LeptonAIChat,
89
+ "PerfXCloud": PerfXCloudChat
90
  }
91
 
92
 
rag/llm/chat_model.py CHANGED
@@ -987,4 +987,11 @@ class LeptonAIChat(Base):
987
  def __init__(self, key, model_name, base_url=None):
988
  if not base_url:
989
  base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
990
- super().__init__(key, model_name, base_url)
 
 
 
 
 
 
 
 
987
  def __init__(self, key, model_name, base_url=None):
988
  if not base_url:
989
  base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
990
+ super().__init__(key, model_name, base_url)
991
+
992
+
993
+ class PerfXCloudChat(Base):
994
+ def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"):
995
+ if not base_url:
996
+ base_url = "https://cloud.perfxlab.cn/v1"
997
+ super().__init__(key, model_name, base_url)
rag/llm/embedding_model.py CHANGED
@@ -553,3 +553,10 @@ class CoHereEmbed(Base):
553
  return np.array([d for d in res.embeddings.float]), int(
554
  res.meta.billed_units.input_tokens
555
  )
 
 
 
 
 
 
 
 
553
  return np.array([d for d in res.embeddings.float]), int(
554
  res.meta.billed_units.input_tokens
555
  )
556
+
557
+
558
+ class PerfXCloudEmbed(OpenAIEmbed):
559
+ def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"):
560
+ if not base_url:
561
+ base_url = "https://cloud.perfxlab.cn/v1"
562
+ super().__init__(key, model_name, base_url)
web/src/assets/svg/llm/perfx-cloud.svg ADDED
web/src/pages/user-setting/setting-model/constant.ts CHANGED
@@ -25,6 +25,7 @@ export const IconMap = {
25
  'OpenAI-API-Compatible': 'openai-api',
26
  cohere: 'cohere',
27
  Lepton: 'lepton',
 
28
  };
29
 
30
  export const BedrockRegionList = [
 
25
  'OpenAI-API-Compatible': 'openai-api',
26
  cohere: 'cohere',
27
  Lepton: 'lepton',
28
+ PerfXCould: 'perfx-could'
29
  };
30
 
31
  export const BedrockRegionList = [