add support for PerfXCloud (#1883)
Browse files### What problem does this PR solve?
#1853 add support for PerfXCloud
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
Co-authored-by: Zhedong Cen <[email protected]>
conf/llm_factories.json
CHANGED
@@ -2442,6 +2442,158 @@
|
|
2442 |
"model_type": "chat"
|
2443 |
}
|
2444 |
]
|
2445 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2446 |
]
|
2447 |
}
|
|
|
2442 |
"model_type": "chat"
|
2443 |
}
|
2444 |
]
|
2445 |
+
},
|
2446 |
+
{
|
2447 |
+
"name": "PerfXCloud",
|
2448 |
+
"logo": "",
|
2449 |
+
"tags": "LLM,TEXT EMBEDDING",
|
2450 |
+
"status": "1",
|
2451 |
+
"llm": [
|
2452 |
+
{
|
2453 |
+
"llm_name": "deepseek-v2-chat",
|
2454 |
+
"tags": "LLM,CHAT,4k",
|
2455 |
+
"max_tokens": 4096,
|
2456 |
+
"model_type": "chat"
|
2457 |
+
},
|
2458 |
+
{
|
2459 |
+
"llm_name": "llama3.1:405b",
|
2460 |
+
"tags": "LLM,CHAT,128k",
|
2461 |
+
"max_tokens": 131072,
|
2462 |
+
"model_type": "chat"
|
2463 |
+
},
|
2464 |
+
{
|
2465 |
+
"llm_name": "Qwen2-72B-Instruct",
|
2466 |
+
"tags": "LLM,CHAT,128k",
|
2467 |
+
"max_tokens": 131072,
|
2468 |
+
"model_type": "chat"
|
2469 |
+
},
|
2470 |
+
{
|
2471 |
+
"llm_name": "Qwen2-72B-Instruct-GPTQ-Int4",
|
2472 |
+
"tags": "LLM,CHAT,2k",
|
2473 |
+
"max_tokens": 2048,
|
2474 |
+
"model_type": "chat"
|
2475 |
+
},
|
2476 |
+
{
|
2477 |
+
"llm_name": "Qwen2-72B-Instruct-awq-int4",
|
2478 |
+
"tags": "LLM,CHAT,32k",
|
2479 |
+
"max_tokens": 32768,
|
2480 |
+
"model_type": "chat"
|
2481 |
+
},
|
2482 |
+
{
|
2483 |
+
"llm_name": "Llama3-Chinese_v2",
|
2484 |
+
"tags": "LLM,CHAT,8k",
|
2485 |
+
"max_tokens": 8192,
|
2486 |
+
"model_type": "chat"
|
2487 |
+
},
|
2488 |
+
{
|
2489 |
+
"llm_name": "Yi-1_5-9B-Chat-16K",
|
2490 |
+
"tags": "LLM,CHAT,16k",
|
2491 |
+
"max_tokens": 16384,
|
2492 |
+
"model_type": "chat"
|
2493 |
+
},
|
2494 |
+
{
|
2495 |
+
"llm_name": "Qwen1.5-72B-Chat-GPTQ-Int4",
|
2496 |
+
"tags": "LLM,CHAT,2k",
|
2497 |
+
"max_tokens": 2048,
|
2498 |
+
"model_type": "chat"
|
2499 |
+
},
|
2500 |
+
{
|
2501 |
+
"llm_name": "Meta-Llama-3.1-8B-Instruct",
|
2502 |
+
"tags": "LLM,CHAT,4k",
|
2503 |
+
"max_tokens": 4096,
|
2504 |
+
"model_type": "chat"
|
2505 |
+
},
|
2506 |
+
{
|
2507 |
+
"llm_name": "Qwen2-7B-Instruct",
|
2508 |
+
"tags": "LLM,CHAT,32k",
|
2509 |
+
"max_tokens": 32768,
|
2510 |
+
"model_type": "chat"
|
2511 |
+
},
|
2512 |
+
{
|
2513 |
+
"llm_name": "deepseek-v2-lite-chat",
|
2514 |
+
"tags": "LLM,CHAT,2k",
|
2515 |
+
"max_tokens": 2048,
|
2516 |
+
"model_type": "chat"
|
2517 |
+
},
|
2518 |
+
{
|
2519 |
+
"llm_name": "Qwen2-7B",
|
2520 |
+
"tags": "LLM,CHAT,128k",
|
2521 |
+
"max_tokens": 131072,
|
2522 |
+
"model_type": "chat"
|
2523 |
+
},
|
2524 |
+
{
|
2525 |
+
"llm_name": "chatglm3-6b",
|
2526 |
+
"tags": "LLM,CHAT,8k",
|
2527 |
+
"max_tokens": 8192,
|
2528 |
+
"model_type": "chat"
|
2529 |
+
},
|
2530 |
+
{
|
2531 |
+
"llm_name": "Meta-Llama-3-70B-Instruct-GPTQ-Int4",
|
2532 |
+
"tags": "LLM,CHAT,1k",
|
2533 |
+
"max_tokens": 1024,
|
2534 |
+
"model_type": "chat"
|
2535 |
+
},
|
2536 |
+
{
|
2537 |
+
"llm_name": "Meta-Llama-3-8B-Instruct",
|
2538 |
+
"tags": "LLM,CHAT,8k",
|
2539 |
+
"max_tokens": 8192,
|
2540 |
+
"model_type": "chat"
|
2541 |
+
},
|
2542 |
+
{
|
2543 |
+
"llm_name": "Mistral-7B-Instruct",
|
2544 |
+
"tags": "LLM,CHAT,32k",
|
2545 |
+
"max_tokens": 32768,
|
2546 |
+
"model_type": "chat"
|
2547 |
+
},
|
2548 |
+
{
|
2549 |
+
"llm_name": "MindChat-Qwen-7B-v2",
|
2550 |
+
"tags": "LLM,CHAT,2k",
|
2551 |
+
"max_tokens": 2048,
|
2552 |
+
"model_type": "chat"
|
2553 |
+
},
|
2554 |
+
{
|
2555 |
+
"llm_name": "phi-2",
|
2556 |
+
"tags": "LLM,CHAT,2k",
|
2557 |
+
"max_tokens": 2048,
|
2558 |
+
"model_type": "chat"
|
2559 |
+
},
|
2560 |
+
{
|
2561 |
+
"llm_name": "SOLAR-10_7B-Instruct",
|
2562 |
+
"tags": "LLM,CHAT,4k",
|
2563 |
+
"max_tokens": 4096,
|
2564 |
+
"model_type": "chat"
|
2565 |
+
},
|
2566 |
+
{
|
2567 |
+
"llm_name": "Mixtral-8x7B-Instruct-v0.1-GPTQ",
|
2568 |
+
"tags": "LLM,CHAT,32k",
|
2569 |
+
"max_tokens": 32768,
|
2570 |
+
"model_type": "chat"
|
2571 |
+
},
|
2572 |
+
{
|
2573 |
+
"llm_name": "Qwen1.5-7B",
|
2574 |
+
"tags": "LLM,CHAT,32k",
|
2575 |
+
"max_tokens": 32768,
|
2576 |
+
"model_type": "chat"
|
2577 |
+
},
|
2578 |
+
{
|
2579 |
+
"llm_name": "BAAI/bge-large-en-v1.5",
|
2580 |
+
"tags": "TEXT EMBEDDING",
|
2581 |
+
"max_tokens": 512,
|
2582 |
+
"model_type": "embedding"
|
2583 |
+
},
|
2584 |
+
{
|
2585 |
+
"llm_name": "BAAI/bge-large-zh-v1.5",
|
2586 |
+
"tags": "TEXT EMBEDDING",
|
2587 |
+
"max_tokens": 1024,
|
2588 |
+
"model_type": "embedding"
|
2589 |
+
},
|
2590 |
+
{
|
2591 |
+
"llm_name": "BAAI/bge-m3",
|
2592 |
+
"tags": "TEXT EMBEDDING",
|
2593 |
+
"max_tokens": 8192,
|
2594 |
+
"model_type": "embedding"
|
2595 |
+
}
|
2596 |
+
]
|
2597 |
+
}
|
2598 |
]
|
2599 |
}
|
rag/llm/__init__.py
CHANGED
@@ -38,7 +38,8 @@ EmbeddingModel = {
|
|
38 |
"NVIDIA": NvidiaEmbed,
|
39 |
"LM-Studio": LmStudioEmbed,
|
40 |
"OpenAI-API-Compatible": OpenAI_APIEmbed,
|
41 |
-
"cohere": CoHereEmbed
|
|
|
42 |
}
|
43 |
|
44 |
|
@@ -84,7 +85,8 @@ ChatModel = {
|
|
84 |
"LM-Studio": LmStudioChat,
|
85 |
"OpenAI-API-Compatible": OpenAI_APIChat,
|
86 |
"cohere": CoHereChat,
|
87 |
-
"LeptonAI": LeptonAIChat
|
|
|
88 |
}
|
89 |
|
90 |
|
|
|
38 |
"NVIDIA": NvidiaEmbed,
|
39 |
"LM-Studio": LmStudioEmbed,
|
40 |
"OpenAI-API-Compatible": OpenAI_APIEmbed,
|
41 |
+
"cohere": CoHereEmbed,
|
42 |
+
"PerfXCloud": PerfXCloudEmbed,
|
43 |
}
|
44 |
|
45 |
|
|
|
85 |
"LM-Studio": LmStudioChat,
|
86 |
"OpenAI-API-Compatible": OpenAI_APIChat,
|
87 |
"cohere": CoHereChat,
|
88 |
+
"LeptonAI": LeptonAIChat,
|
89 |
+
"PerfXCloud": PerfXCloudChat
|
90 |
}
|
91 |
|
92 |
|
rag/llm/chat_model.py
CHANGED
@@ -987,4 +987,11 @@ class LeptonAIChat(Base):
|
|
987 |
def __init__(self, key, model_name, base_url=None):
|
988 |
if not base_url:
|
989 |
base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
|
990 |
-
super().__init__(key, model_name, base_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
987 |
def __init__(self, key, model_name, base_url=None):
|
988 |
if not base_url:
|
989 |
base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
|
990 |
+
super().__init__(key, model_name, base_url)
|
991 |
+
|
992 |
+
|
993 |
+
class PerfXCloudChat(Base):
|
994 |
+
def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"):
|
995 |
+
if not base_url:
|
996 |
+
base_url = "https://cloud.perfxlab.cn/v1"
|
997 |
+
super().__init__(key, model_name, base_url)
|
rag/llm/embedding_model.py
CHANGED
@@ -553,3 +553,10 @@ class CoHereEmbed(Base):
|
|
553 |
return np.array([d for d in res.embeddings.float]), int(
|
554 |
res.meta.billed_units.input_tokens
|
555 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
553 |
return np.array([d for d in res.embeddings.float]), int(
|
554 |
res.meta.billed_units.input_tokens
|
555 |
)
|
556 |
+
|
557 |
+
|
558 |
+
class PerfXCloudEmbed(OpenAIEmbed):
|
559 |
+
def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"):
|
560 |
+
if not base_url:
|
561 |
+
base_url = "https://cloud.perfxlab.cn/v1"
|
562 |
+
super().__init__(key, model_name, base_url)
|
web/src/assets/svg/llm/perfx-cloud.svg
ADDED
|
web/src/pages/user-setting/setting-model/constant.ts
CHANGED
@@ -25,6 +25,7 @@ export const IconMap = {
|
|
25 |
'OpenAI-API-Compatible': 'openai-api',
|
26 |
cohere: 'cohere',
|
27 |
Lepton: 'lepton',
|
|
|
28 |
};
|
29 |
|
30 |
export const BedrockRegionList = [
|
|
|
25 |
'OpenAI-API-Compatible': 'openai-api',
|
26 |
cohere: 'cohere',
|
27 |
Lepton: 'lepton',
|
28 |
+
PerfXCould: 'perfx-could'
|
29 |
};
|
30 |
|
31 |
export const BedrockRegionList = [
|