BhashiniSpace_Text_gemma

Runtime error

App Files Files Community

FlawedLLM commited on May 23, 2024

Commit

b6eb92e

verified ·

1 Parent(s): 4daf913

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -175

app.py CHANGED Viewed

@@ -1,177 +1,9 @@
-# import torch; torch.version.cuda
-# # from huggingface_hub import login, HfFolder
-# import subprocess
-# # import getpass
-# # def run_sudo_command(cmd):
-# #     try:
-# #         password = getpass.getpass(prompt="Enter your sudo password: ")  # Securely get the password
-# #         result = subprocess.run(["sudo", "-S"] + cmd, input=password.encode(), capture_output=True, text=True, check=True)
-# #         print(result.stdout)
-# #     except subprocess.CalledProcessError as e:
-# #         print(f"Error executing command: {e.stderr}")
-# # # Run the ldconfig command
-# # run_sudo_command(["ldconfig", "/usr/lib64-nvidia"])
-# def run_command(cmd, shell=False):
-#     """Runs a shell command and prints the output."""
-#     try:
-#         result = subprocess.run(cmd, shell=shell, capture_output=True, text=True, check=True)
-#         print(result.stdout)
-#     except subprocess.CalledProcessError as e:
-#         print(f"Error executing command: {e.stderr}")
-# subprocess.run(["pip", "install", "--upgrade", "pip"], check=True)
-# # subprocess.run(["pip", "install", "--upgrade", "torch"], check=True)
-# # subprocess.run(["pip", "install", "--upgrade", "transformers"], check=True)
-# # Pip install command as a list
-# pip_command = [
-#     "pip",
-#     "install",
-#     "--upgrade",
-#     "--force-reinstall",
-#     "--no-cache-dir",
-#     "torch==2.1.1",
-#     "triton",
-#     "--index-url",
-#     "https://download.pytorch.org/whl/cu121"
-# ]
-# run_command(pip_command)
-# run_command(["pip", "install", "--no-deps", "trl", "peft", "accelerate", "bitsandbytes"])
-# # subprocess.run(["pip", "install", "--upgrade", "peft"], check=True)
-# subprocess.run(["pip", "install", "xformers"], check=True)
-# # subprocess.run(["pip", "install", "--upgrade", "accelerate"], check=True)
-# subprocess.run(["unsloth[cu121-ampere-torch211] @ git+https://github.com/unslothai/unsloth.git"], check=True)
-# import subprocess
-# # 1. Create the conda environment
-# run_command(["conda", "create", "-y", "--name", "unsloth_env", "python=3.10"])
-# # 2. Activate the environment (Note: Requires shell=True)
-# run_command("conda activate unsloth_env", shell=True)
-# # 3. Install PyTorch and related packages with conda
-# run_command("conda install pytorch-cuda=<12.1/11.8> pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers")
-# # 4. Install unsloth from the GitHub repository with pip
-# run_command("pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"")
-# # 5. Install additional pip packages without dependencies
-# run_command("pip install --no-deps trl peft accelerate bitsandbytes")
-# import subprocess
-# def run_command(cmd):
-#     try:
-#         result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-#         print(result.stdout)
-#     except subprocess.CalledProcessError as e:
-#         print(f"Error executing command: {e.stderr}")
-# # Pip install xformers
-# run_command([
-#     "pip",
-#     "install",
-#     "-U",
-#     "xformers<0.0.26",
-#     "--index-url",
-#     "https://download.pytorch.org/whl/cu121"
-# ])
-# # Pip install unsloth from GitHub
-# run_command([
-#     "pip",
-#     "install",
-#     "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
-# ])
 import os
 HF_TOKEN = os.environ["HF_TOKEN"]
 import re
 import spaces
 import gradio as gr
 import torch
-# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
-# from peft import PeftModel, PeftConfig
-# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_00")
-# quantization_config = BitsAndBytesConfig(
-#         load_in_4bit=True,
-#         bnb_4bit_use_double_quant=True,
-#         bnb_4bit_quant_type="nf4",
-#         bnb_4bit_compute_dtype=torch.float16)
-# config=AutoConfig.from_pretrained("FlawedLLM/Bhashini_00")
-# model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_00",
-#                                              device_map="auto",
-#                                              quantization_config=quantization_config,
-#                                              torch_dtype =torch.float16,
-#                                              low_cpu_mem_usage=True,
-#                                              use_safetensors=True,
-#                                             )
-# # Assuming you have your HF repository in this format: "your_username/your_model_name"
-# model_id = "FlawedLLM/BhashiniLLM"
-# # Load the base model (the one you fine-tuned with LoRA)
-# base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')  # Load in 8-bit for efficiency
-# for param in base_model.parameters():
-#     param.data = param.data.to(torch.float16)  # or torch.float32
-# # Load the LoRA adapter weights
-# model = PeftModel.from_pretrained(base_model, model_id)
-# tokenizer = AutoTokenizer.from_pretrained(model_id)
-# model = AutoModel.from_pretrained("FlawedLLM/Bhashini", load_in_4bit=True, device_map='auto')
-    # I highly do NOT suggest - use Unsloth if possible
-# from peft import AutoPeftModelForCausalLM
-# from transformers import AutoTokenizer
-# model = AutoPeftModelForCausalLM.from_pretrained(
-#         "FlawedLLM/Bhashini", # YOUR MODEL YOU USED FOR TRAINING
-#         load_in_4bit = True,
-#     )
-# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
-# # Load model directly
-# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
-# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_9")
-# config = AutoConfig.from_pretrained("FlawedLLM/Bhashini_9")  # Load configuration
-# # quantization_config = BitsAndBytesConfig(
-# #         load_in_4bit=True,
-# #         bnb_4bit_use_double_quant=True,
-# #         bnb_4bit_quant_type="nf4",
-# #         bnb_4bit_compute_dtype=torch.float16
-# # )
-# # torch_dtype =torch.float16
-# model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9",config=config, ignore_mismatched_sizes=True).to('cuda')
-# Load model directly
-# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini89", trust_remote_code=True)
-# quantization_config = BitsAndBytesConfig(
-#         load_in_4bit=True,
-#         bnb_4bit_use_double_quant=True,
-#         bnb_4bit_quant_type="nf4",
-#         bnb_4bit_compute_dtype=torch.float16)
-# model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini89",
-#                                              device_map="auto",
-#                                              quantization_config=quantization_config,
-#                                              torch_dtype =torch.float16,
-#                                              low_cpu_mem_usage=True,
-#                                              use_safetensors=True,
-#                                              trust_remote_code=True)
-# from unsloth import FastLanguageModel
-# model, tokenizer = FastLanguageModel.from_pretrained(
-#         model_name = "FlawedLLM/Bhashini_gemma_lora_clean_final", # YOUR MODEL YOU USED FOR TRAINING
-#         max_seq_length = max_seq_length,
-#         dtype = dtype,
-#         load_in_4bit = load_in_4bit,)
-# FastLanguageModel.for_inference(model) # Enable native 2x faster inference
-# Load model directly
 from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
@@ -255,13 +87,6 @@ def chunk_it(input_command, item_list):
     return reply
-# iface=gr.Interface(fn=chunk_it,
-#                   inputs="text",
-#                   inputs="text",
-#                   outputs="text",
-#                   title="Formatter_Pro",
-#                   )
 iface = gr.Interface(
     fn=chunk_it,

 import os
 HF_TOKEN = os.environ["HF_TOKEN"]
 import re
 import spaces
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
     return reply
 iface = gr.Interface(
     fn=chunk_it,