macadeliccc
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -13,6 +13,38 @@ Trained on 2x4090 using QLoRa and FSDP
|
|
13 |
|
14 |
+ [LoRa](macadeliccc/Samantha-Qwen2-7B-LoRa)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
## Prompt Template
|
17 |
|
18 |
```
|
|
|
13 |
|
14 |
+ [LoRa](macadeliccc/Samantha-Qwen2-7B-LoRa)
|
15 |
|
16 |
+
## Launch Using VLLM
|
17 |
+
|
18 |
+
```bash
|
19 |
+
python -m vllm.entrypoints.openai.api_server \
|
20 |
+
--model macadeliccc/Samantha-Qwen2-7B-AWQ \
|
21 |
+
--chat-template ./examples/template_chatml.jinja \
|
22 |
+
--quantization awq
|
23 |
+
```
|
24 |
+
|
25 |
+
```python
|
26 |
+
from openai import OpenAI
|
27 |
+
# Set OpenAI's API key and API base to use vLLM's API server.
|
28 |
+
openai_api_key = "EMPTY"
|
29 |
+
openai_api_base = "http://localhost:8000/v1"
|
30 |
+
|
31 |
+
client = OpenAI(
|
32 |
+
api_key=openai_api_key,
|
33 |
+
base_url=openai_api_base,
|
34 |
+
)
|
35 |
+
|
36 |
+
chat_response = client.chat.completions.create(
|
37 |
+
model="macadeliccc/Samantha-Qwen2-7B-AWQ",
|
38 |
+
messages=[
|
39 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
40 |
+
{"role": "user", "content": "Tell me a joke."},
|
41 |
+
]
|
42 |
+
)
|
43 |
+
print("Chat response:", chat_response)
|
44 |
+
```
|
45 |
+
|
46 |
+
## Ollama
|
47 |
+
|
48 |
## Prompt Template
|
49 |
|
50 |
```
|