freddyaboulton HF staff commited on
Commit
4434826
·
verified ·
1 Parent(s): cce1163

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +7 -7
  2. requirements.txt +4 -0
  3. run.ipynb +1 -0
  4. run.py +21 -0
README.md CHANGED
@@ -1,12 +1,12 @@
 
1
  ---
2
- title: Llm Hf Transformers Main
3
- emoji: 📈
4
- colorFrom: red
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.7.0
8
- app_file: app.py
9
  pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+
2
  ---
3
+ title: llm_hf_transformers_main
4
+ emoji: 🔥
5
+ colorFrom: indigo
6
+ colorTo: indigo
7
  sdk: gradio
8
  sdk_version: 5.7.0
9
+ app_file: run.py
10
  pinned: false
11
+ hf_oauth: true
12
  ---
 
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio-client @ git+https://github.com/gradio-app/gradio@afd75def9e979d9b255a6d22d33a3aee63b6f225#subdirectory=client/python
2
+ https://gradio-pypi-previews.s3.amazonaws.com/afd75def9e979d9b255a6d22d33a3aee63b6f225/gradio-5.7.0-py3-none-any.whl
3
+ transformers>=4.46.0
4
+ torch>=2.3.1
run.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: llm_hf_transformers"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio transformers>=4.46.0 torch>=2.3.1 "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["from transformers import AutoModelForCausalLM, AutoTokenizer\n", "import gradio as gr\n", "\n", "checkpoint = \"HuggingFaceTB/SmolLM2-135M-Instruct\"\n", "device = \"cpu\" # \"cuda\" or \"cpu\"\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", "model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)\n", "\n", "def predict(message, history):\n", " history.append({\"role\": \"user\", \"content\": message})\n", " input_text = tokenizer.apply_chat_template(history, tokenize=False)\n", " inputs = tokenizer.encode(input_text, return_tensors=\"pt\").to(device) # type: ignore\n", " outputs = model.generate(inputs, max_new_tokens=100, temperature=0.2, top_p=0.9, do_sample=True)\n", " decoded = tokenizer.decode(outputs[0])\n", " response = decoded.split(\"<|im_start|>assistant\\n\")[-1].split(\"<|im_end|>\")[0]\n", " return response\n", "\n", "demo = gr.ChatInterface(predict, type=\"messages\")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
run.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import gradio as gr
3
+
4
+ checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"
5
+ device = "cpu" # "cuda" or "cpu"
6
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
7
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
8
+
9
+ def predict(message, history):
10
+ history.append({"role": "user", "content": message})
11
+ input_text = tokenizer.apply_chat_template(history, tokenize=False)
12
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device) # type: ignore
13
+ outputs = model.generate(inputs, max_new_tokens=100, temperature=0.2, top_p=0.9, do_sample=True)
14
+ decoded = tokenizer.decode(outputs[0])
15
+ response = decoded.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0]
16
+ return response
17
+
18
+ demo = gr.ChatInterface(predict, type="messages")
19
+
20
+ if __name__ == "__main__":
21
+ demo.launch()