Spaces:
Runtime error
Runtime error
File size: 10,810 Bytes
dde0367 22703ff dde0367 3362f3e dde0367 3362f3e dde0367 a17c9e7 9df6fd0 a6f0495 a364d85 a6f0495 a364d85 a6f0495 a364d85 a6f0495 a364d85 a6f0495 a364d85 dde0367 9df6fd0 322f74c 3a16188 f823e77 5f50ed6 696557c eac88f2 5f50ed6 e1efcdb d2a5fcd e1efcdb dcde33a e1efcdb dcde33a e1efcdb dcde33a 6875a6e dcde33a df96462 e74adc0 4e8739d b4bedb5 4e8739d 1cfdffa 4e8739d 99c292a 6f18f2a be551a6 6f18f2a 731e7a8 03d49a3 fe1b079 03d49a3 b9bdc7e 3a16188 7672cd0 3a16188 42fc25c 3a16188 42fc25c 3a16188 42fc25c 3a16188 42fc25c 3a16188 42fc25c 3a16188 2d90ba4 3a16188 2d90ba4 3a16188 42fc25c 7672cd0 b9bdc7e 7672cd0 3a16188 b94a687 3a16188 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 |
# import torch; torch.version.cuda
# # from huggingface_hub import login, HfFolder
# import subprocess
# # import getpass
# # def run_sudo_command(cmd):
# # try:
# # password = getpass.getpass(prompt="Enter your sudo password: ") # Securely get the password
# # result = subprocess.run(["sudo", "-S"] + cmd, input=password.encode(), capture_output=True, text=True, check=True)
# # print(result.stdout)
# # except subprocess.CalledProcessError as e:
# # print(f"Error executing command: {e.stderr}")
# # # Run the ldconfig command
# # run_sudo_command(["ldconfig", "/usr/lib64-nvidia"])
# def run_command(cmd, shell=False):
# """Runs a shell command and prints the output."""
# try:
# result = subprocess.run(cmd, shell=shell, capture_output=True, text=True, check=True)
# print(result.stdout)
# except subprocess.CalledProcessError as e:
# print(f"Error executing command: {e.stderr}")
# subprocess.run(["pip", "install", "--upgrade", "pip"], check=True)
# # subprocess.run(["pip", "install", "--upgrade", "torch"], check=True)
# # subprocess.run(["pip", "install", "--upgrade", "transformers"], check=True)
# # Pip install command as a list
# pip_command = [
# "pip",
# "install",
# "--upgrade",
# "--force-reinstall",
# "--no-cache-dir",
# "torch==2.1.1",
# "triton",
# "--index-url",
# "https://download.pytorch.org/whl/cu121"
# ]
# run_command(pip_command)
# run_command(["pip", "install", "--no-deps", "trl", "peft", "accelerate", "bitsandbytes"])
# # subprocess.run(["pip", "install", "--upgrade", "peft"], check=True)
# subprocess.run(["pip", "install", "xformers"], check=True)
# # subprocess.run(["pip", "install", "--upgrade", "accelerate"], check=True)
# subprocess.run(["unsloth[cu121-ampere-torch211] @ git+https://github.com/unslothai/unsloth.git"], check=True)
# import subprocess
# # 1. Create the conda environment
# run_command(["conda", "create", "-y", "--name", "unsloth_env", "python=3.10"])
# # 2. Activate the environment (Note: Requires shell=True)
# run_command("conda activate unsloth_env", shell=True)
# # 3. Install PyTorch and related packages with conda
# run_command("conda install pytorch-cuda=<12.1/11.8> pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers")
# # 4. Install unsloth from the GitHub repository with pip
# run_command("pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"")
# # 5. Install additional pip packages without dependencies
# run_command("pip install --no-deps trl peft accelerate bitsandbytes")
import subprocess
def run_command(cmd):
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Error executing command: {e.stderr}")
# Pip install xformers
run_command([
"pip",
"install",
"-U",
"xformers<0.0.26",
"--index-url",
"https://download.pytorch.org/whl/cu121"
])
# Pip install unsloth from GitHub
run_command([
"pip",
"install",
"unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
])
import os
HF_TOKEN = os.environ["HF_TOKEN"]
import re
import spaces
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
# from peft import PeftModel, PeftConfig
# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_00")
# quantization_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_use_double_quant=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_compute_dtype=torch.float16)
# config=AutoConfig.from_pretrained("FlawedLLM/Bhashini_00")
# model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_00",
# device_map="auto",
# quantization_config=quantization_config,
# torch_dtype =torch.float16,
# low_cpu_mem_usage=True,
# use_safetensors=True,
# )
# # Assuming you have your HF repository in this format: "your_username/your_model_name"
# model_id = "FlawedLLM/BhashiniLLM"
# # Load the base model (the one you fine-tuned with LoRA)
# base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
# for param in base_model.parameters():
# param.data = param.data.to(torch.float16) # or torch.float32
# # Load the LoRA adapter weights
# model = PeftModel.from_pretrained(base_model, model_id)
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModel.from_pretrained("FlawedLLM/Bhashini", load_in_4bit=True, device_map='auto')
# I highly do NOT suggest - use Unsloth if possible
# from peft import AutoPeftModelForCausalLM
# from transformers import AutoTokenizer
# model = AutoPeftModelForCausalLM.from_pretrained(
# "FlawedLLM/Bhashini", # YOUR MODEL YOU USED FOR TRAINING
# load_in_4bit = True,
# )
# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
# # Load model directly
# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_9")
# config = AutoConfig.from_pretrained("FlawedLLM/Bhashini_9") # Load configuration
# # quantization_config = BitsAndBytesConfig(
# # load_in_4bit=True,
# # bnb_4bit_use_double_quant=True,
# # bnb_4bit_quant_type="nf4",
# # bnb_4bit_compute_dtype=torch.float16
# # )
# # torch_dtype =torch.float16
# model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9",config=config, ignore_mismatched_sizes=True).to('cuda')
# Load model directly
# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini89", trust_remote_code=True)
# quantization_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_use_double_quant=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_compute_dtype=torch.float16)
# model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini89",
# device_map="auto",
# quantization_config=quantization_config,
# torch_dtype =torch.float16,
# low_cpu_mem_usage=True,
# use_safetensors=True,
# trust_remote_code=True)
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "FlawedLLM/Bhashini_gemma_lora_clean_final", # YOUR MODEL YOU USED FOR TRAINING
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
# alpaca_prompt = You MUST copy from above!
@spaces.GPU(duration=300)
def chunk_it(input_command, item_list):
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
if item_list is not None:
item_list = f'''The ItemName should be chosen from the given list : {item_list} , except when adding item. If ItemName does not find anything SIMILAR in the list, then the ItemName should be "Null" '''
inputs = tokenizer(
[
alpaca_prompt.format(
f'''
You will receive text input that you need to analyze to perform the following tasks:
transaction: Record the details of an item transaction.
last n days transactions: Retrieve transaction records for a specified time period.
view risk inventory: View inventory items based on a risk category.
view inventory: View inventory details.
new items: Add new items to the inventory.
old items: View old items in inventory.
report generation: Generate various inventory reports.
Required Parameters:
Each task requires specific parameters to execute correctly:
transaction:
ItemName (string)
ItemQt (quantity - integer)
Type (string: "sale" or "purchase" or "return")
ShelfNo (string or integer)
ReorderPoint (integer)
last n days transactions:
ItemName (string)
Duration (integer: number of days)
view risk inventory:
RiskType (string: "overstock", "understock", or Null for all risk types)
view inventory:
ItemName (string)
ShelfNo (string or integer)
new items:
ItemName (string)
SellingPrice (number)
CostPrice (number)
old items:
ShelfNo (string or integer)
report generation:
ItemName (string)
Duration (integer: number of days)
ReportType (string: "profit", "revenue", "inventory", or Null for all reports)
{item_list}
ALWAYS provide output in a JSON format.''', # instruction
input_command, # input
"", # output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens = 216, use_cache = True)
tokenizer.batch_decode(outputs)
reply=tokenizer.batch_decode(outputs)
# Regular expression pattern to match content between "### Response:" and "<|end_of_text|>"
pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
# Search for the pattern in the text
match = re.search(pattern, reply[0], re.DOTALL) # re.DOTALL allows '.' to match newlines
reply = match.group(1).strip() # Extract and remove extra whitespace
return reply
# iface=gr.Interface(fn=chunk_it,
# inputs="text",
# inputs="text",
# outputs="text",
# title="Formatter_Pro",
# )
iface = gr.Interface(
fn=chunk_it,
inputs=[
gr.Textbox(label="Input Command", lines=3),
gr.Textbox(label="Item List", lines=5)
],
outputs="text",
title="Formatter Pro",
)
iface.launch(inline=False) |