Spaces:

Tonic
/

salamandra-on-device

Sleeping

Tonic commited on Oct 3, 2024

Commit

6c11267

unverified ·

1 Parent(s): d2756e5

add demo

Files changed (2) hide show

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Salamandra On Device
-emoji: 💻
 colorFrom: indigo
 colorTo: yellow
 sdk: gradio

 ---
+title: Salamandra On-Device
+emoji: 📲🦎
 colorFrom: indigo
 colorTo: yellow
 sdk: gradio

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import gradio as gr
 from transformers import pipeline, set_seed
 import torch
 description = "The models are intended for both research and commercial use in any of the languages included in the training data. The base models are intended either for language generation or to be further fine-tuned for specific use-cases. The instruction-tuned variants can be used as general-purpose assistants, as long as the user is fully aware of the model’s limitations."
@@ -11,7 +13,13 @@ joinus = """
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "BSC-LT/salamandra-2b"
-generator = pipeline("text-generation", model_id, device_map="auto")
 def generate_text(prompt, temperature, top_p, max_new_tokens, repetition_penalty):
     # set_seed(42)
@@ -20,7 +28,8 @@ def generate_text(prompt, temperature, top_p, max_new_tokens, repetition_penalty
         "top_p": top_p,
         "max_new_tokens": max_new_tokens,
         "repetition_penalty": repetition_penalty,
-        "do_sample": True
     }
     output = generator(prompt, **generation_args)
     return output[0]["generated_text"]

 import gradio as gr
 from transformers import pipeline, set_seed
+from transformers import pipeline, set_seed, AutoTokenizer, AutoModelForCausalLM
 import torch
 description = "The models are intended for both research and commercial use in any of the languages included in the training data. The base models are intended either for language generation or to be further fine-tuned for specific use-cases. The instruction-tuned variants can be used as general-purpose assistants, as long as the user is fully aware of the model’s limitations."
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "BSC-LT/salamandra-2b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
+generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
+# Set pad_token_id to eos_token_id for open-end generation
+if tokenizer.pad_token_id is None:
+    tokenizer.pad_token_id = tokenizer.eos_token_id
 def generate_text(prompt, temperature, top_p, max_new_tokens, repetition_penalty):
     # set_seed(42)
         "top_p": top_p,
         "max_new_tokens": max_new_tokens,
         "repetition_penalty": repetition_penalty,
+        "do_sample": True,
+        "pad_token_id": tokenizer.eos_token_id
     }
     output = generator(prompt, **generation_args)
     return output[0]["generated_text"]