Spaces:
Sleeping
Sleeping
test
Browse files- app.py +10 -26
- bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl +0 -3
- requirements.txt +2 -5
app.py
CHANGED
@@ -1,19 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
from transformers import TextStreamer
|
4 |
-
|
5 |
-
# Load model directly
|
6 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
7 |
|
8 |
"""
|
9 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
10 |
-
Info of how to use a model after training on hf https://huggingface.co/docs/trl/main/en/use_model
|
11 |
"""
|
12 |
|
13 |
-
|
14 |
-
model = AutoModelForCausalLM.from_pretrained(peft_model_id)
|
15 |
-
|
16 |
-
tokenizer = AutoTokenizer.from_pretrained("samlama111/lora_model")
|
17 |
|
18 |
|
19 |
def respond(
|
@@ -36,24 +28,16 @@ def respond(
|
|
36 |
|
37 |
response = ""
|
38 |
|
39 |
-
inputs = tokenizer.apply_chat_template(
|
40 |
-
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
|
41 |
-
)
|
42 |
-
|
43 |
-
text_streamer = TextStreamer(tokenizer)
|
44 |
# TODO: Doesn't stream ATM
|
45 |
-
for message in
|
46 |
-
|
|
|
|
|
|
|
|
|
47 |
):
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
# Manually getting the response
|
52 |
-
response = decoded_message.split("assistant")[
|
53 |
-
-1
|
54 |
-
].strip() # Extract only the assistant's response
|
55 |
-
print(response)
|
56 |
-
|
57 |
yield response
|
58 |
|
59 |
|
|
|
1 |
import gradio as gr
|
2 |
+
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
3 |
|
4 |
"""
|
5 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
|
|
6 |
"""
|
7 |
|
8 |
+
client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
def respond(
|
|
|
28 |
|
29 |
response = ""
|
30 |
|
|
|
|
|
|
|
|
|
|
|
31 |
# TODO: Doesn't stream ATM
|
32 |
+
for message in client.chat_completion(
|
33 |
+
messages,
|
34 |
+
max_tokens=max_tokens,
|
35 |
+
stream=True,
|
36 |
+
temperature=temperature,
|
37 |
+
top_p=top_p,
|
38 |
):
|
39 |
+
token = message.choices[0].delta.content
|
40 |
+
response += token
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
yield response
|
42 |
|
43 |
|
bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:18b28598f8ef7a07c1784b0a52480b8c64e15918ac752964ac0ca2085953b78c
|
3 |
-
size 1811514
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,6 +1,3 @@
|
|
1 |
-
# huggingface_hub==0.25.2
|
2 |
huggingface_hub
|
3 |
-
|
4 |
-
|
5 |
-
peft
|
6 |
-
/tmp/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl
|
|
|
|
|
1 |
huggingface_hub
|
2 |
+
unsloth
|
3 |
+
gradio
|
|
|
|