Spaces:

id2223-SE
/

id2223-lab2

Sleeping

EPark25 commited on Dec 3, 2024

Commit

db2ba20

1 Parent(s): 61eea9e

test

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,19 +1,11 @@
 import gradio as gr
-from peft import PeftModel
-from transformers import TextStreamer
-# Load model directly
-from transformers import AutoModelForCausalLM, AutoTokenizer
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-Info of how to use a model after training on hf https://huggingface.co/docs/trl/main/en/use_model
 """
-peft_model_id = "samlama111/lora_model"
-model = AutoModelForCausalLM.from_pretrained(peft_model_id)
-tokenizer = AutoTokenizer.from_pretrained("samlama111/lora_model")
 def respond(
@@ -36,24 +28,16 @@ def respond(
     response = ""
-    inputs = tokenizer.apply_chat_template(
-        messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
-    )
-    text_streamer = TextStreamer(tokenizer)
     # TODO: Doesn't stream ATM
-    for message in model.generate(
-        input_ids=inputs, streamer=text_streamer, max_new_tokens=1024, use_cache=True
     ):
-        # Decode the tensor to a string
-        decoded_message = tokenizer.decode(message, skip_special_tokens=True)
-        # Manually getting the response
-        response = decoded_message.split("assistant")[
-            -1
-        ].strip()  # Extract only the assistant's response
-        print(response)
         yield response

 import gradio as gr
+from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
 def respond(
     response = ""
     # TODO: Doesn't stream ATM
+    for message in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
     ):
+        token = message.choices[0].delta.content
+        response += token
         yield response

bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:18b28598f8ef7a07c1784b0a52480b8c64e15918ac752964ac0ca2085953b78c
-size 1811514

requirements.txt CHANGED Viewed

@@ -1,6 +1,3 @@
-# huggingface_hub==0.25.2
 huggingface_hub
-transformers>=4.45.1
-torch
-peft
-/tmp/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl

 huggingface_hub
+unsloth
+gradio