Spaces:
Runtime error
Runtime error
File size: 5,404 Bytes
3a16188 f823e77 5f50ed6 eac88f2 5f50ed6 e1efcdb df96462 eac88f2 e1efcdb dcde33a e1efcdb dcde33a e1efcdb dcde33a 6875a6e dcde33a df96462 e74adc0 b4bedb5 41aa4c4 b4bedb5 4e539da 41aa4c4 4e539da 3d57546 fe1b079 3a16188 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import re
import spaces
import gradio as gr
import torch
# from peft import PeftModel, PeftConfig
# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
# quantization_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_use_double_quant=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_compute_dtype=torch.float16)
# model = AutoModelForCausalLM.from_pretrained("FlawedLLM/BhashiniLLM",
# device_map="auto",
# quantization_config=quantization_config,
# torch_dtype =torch.float16,
# low_cpu_mem_usage=True,
# use_safetensors=True,
# )
# # Assuming you have your HF repository in this format: "your_username/your_model_name"
# model_id = "FlawedLLM/BhashiniLLM"
# # Load the base model (the one you fine-tuned with LoRA)
# base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
# for param in base_model.parameters():
# param.data = param.data.to(torch.float16) # or torch.float32
# # Load the LoRA adapter weights
# model = PeftModel.from_pretrained(base_model, model_id)
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModel.from_pretrained("FlawedLLM/Bhashini", load_in_4bit=True, device_map='auto')
# I highly do NOT suggest - use Unsloth if possible
# from peft import AutoPeftModelForCausalLM
# from transformers import AutoTokenizer
# model = AutoPeftModelForCausalLM.from_pretrained(
# "FlawedLLM/Bhashini", # YOUR MODEL YOU USED FOR TRAINING
# load_in_4bit = True,
# )
# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_9")
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16)
torch_dtype =torch.float16
model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9",quantization_config=quantization_config ,)
@spaces.GPU(duration=300)
def chunk_it(input_command):
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
inputs = tokenizer(
[
alpaca_prompt.format(
'''
You will receive text input that you need to analyze to perform the following tasks:
transaction: Record the details of an item transaction.
last n days transactions: Retrieve transaction records for a specified time period.
view risk inventory: View inventory items based on a risk category.
view inventory: View inventory details.
new items: Add new items to the inventory.
old items: View old items in inventory.
report generation: Generate various inventory reports.
Required Parameters:
Each task requires specific parameters to execute correctly:
transaction:
ItemName (string)
ItemQt (quantity - integer)
Flow (string: "in" or "out")
ShelfNo (string or integer)
last n days transactions:
ItemName (string)
Duration (integer: number of days, default: 30)
view risk inventory:
RiskType (string: "overstock", "understock", or Null for all risk types)
view inventory:
ItemName (string)
ShelfNo (string or integer)
new items:
ItemName (string)
SellingPrice (number)
CostPrice (number)
old items:
ShelfNo (string or integer)
report generation:
ItemName (string)
Duration (integer: number of days, default: 6)
ReportType (string: "profit", "revenue", "inventory", or Null for all reports)
ALWAYS provide output in a JSON format.''', # instruction
input_command, # input
"", # output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens = 216, use_cache = True)
tokenizer.batch_decode(outputs)
reply=tokenizer.batch_decode(outputs)
# Regular expression pattern to match content between "### Response:" and "<|end_of_text|>"
pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
# Search for the pattern in the text
match = re.search(pattern, reply[0], re.DOTALL) # re.DOTALL allows '.' to match newlines
reply = match.group(1).strip() # Extract and remove extra whitespace
return reply
iface=gr.Interface(fn=chunk_it,
inputs="text",
outputs="text",
title="Formatter_Pro",
)
iface.launch(inline=False) |