BhashiniSpace_Text_gemma

Runtime error

App Files Files Community

FlawedLLM commited on May 23, 2024

Commit

fbeb854

verified ·

1 Parent(s): dde0367

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -33

app.py CHANGED Viewed

@@ -60,31 +60,31 @@
 # # 5. Install additional pip packages without dependencies
 # run_command("pip install --no-deps trl peft accelerate bitsandbytes")
-import subprocess
-def run_command(cmd):
-    try:
-        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-        print(result.stdout)
-    except subprocess.CalledProcessError as e:
-        print(f"Error executing command: {e.stderr}")
-# Pip install xformers
-run_command([
-    "pip",
-    "install",
-    "-U",
-    "xformers<0.0.26",
-    "--index-url",
-    "https://download.pytorch.org/whl/cu121"
-])
-# Pip install unsloth from GitHub
-run_command([
-    "pip",
-    "install",
-    "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
-])
 import os
 HF_TOKEN = os.environ["HF_TOKEN"]
@@ -92,7 +92,7 @@ import re
 import spaces
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
 # from peft import PeftModel, PeftConfig
@@ -164,14 +164,18 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 #                                              low_cpu_mem_usage=True,
 #                                              use_safetensors=True,
 #                                              trust_remote_code=True)
-from unsloth import FastLanguageModel
-model, tokenizer = FastLanguageModel.from_pretrained(
-        model_name = "FlawedLLM/Bhashini_gemma_lora_clean_final", # YOUR MODEL YOU USED FOR TRAINING
-        max_seq_length = max_seq_length,
-        dtype = dtype,
-        load_in_4bit = load_in_4bit,)
-FastLanguageModel.for_inference(model) # Enable native 2x faster inference
 # alpaca_prompt = You MUST copy from above!
 @spaces.GPU(duration=300)
 def chunk_it(input_command, item_list):

 # # 5. Install additional pip packages without dependencies
 # run_command("pip install --no-deps trl peft accelerate bitsandbytes")
+# import subprocess
+# def run_command(cmd):
+#     try:
+#         result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+#         print(result.stdout)
+#     except subprocess.CalledProcessError as e:
+#         print(f"Error executing command: {e.stderr}")
+# # Pip install xformers
+# run_command([
+#     "pip",
+#     "install",
+#     "-U",
+#     "xformers<0.0.26",
+#     "--index-url",
+#     "https://download.pytorch.org/whl/cu121"
+# ])
+# # Pip install unsloth from GitHub
+# run_command([
+#     "pip",
+#     "install",
+#     "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
+# ])
 import os
 HF_TOKEN = os.environ["HF_TOKEN"]
 import spaces
 import gradio as gr
 import torch
+# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
 # from peft import PeftModel, PeftConfig
 #                                              low_cpu_mem_usage=True,
 #                                              use_safetensors=True,
 #                                              trust_remote_code=True)
+# from unsloth import FastLanguageModel
+# model, tokenizer = FastLanguageModel.from_pretrained(
+#         model_name = "FlawedLLM/Bhashini_gemma_lora_clean_final", # YOUR MODEL YOU USED FOR TRAINING
+#         max_seq_length = max_seq_length,
+#         dtype = dtype,
+#         load_in_4bit = load_in_4bit,)
+# FastLanguageModel.for_inference(model) # Enable native 2x faster inference
+# Load model directly
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
+model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
 # alpaca_prompt = You MUST copy from above!
 @spaces.GPU(duration=300)
 def chunk_it(input_command, item_list):