Spaces:
Sleeping
Sleeping
File size: 4,313 Bytes
686b9bf 55f4e8e ad4a2a0 55f4e8e 5f52293 686b9bf 5f52293 8933f51 5f52293 55f4e8e ad4a2a0 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 686b9bf 55f4e8e 686b9bf 55f4e8e 686b9bf 5f52293 686b9bf 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 55f4e8e 5f52293 686b9bf 55f4e8e 686b9bf 5f52293 686b9bf 5f52293 686b9bf 55f4e8e 5f52293 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import torch
import spaces
import os
HF_TOKEN = os.environ["HF_TOKEN"]
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, PeftModel, get_peft_model
import gradio as gr
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters")
# Configuration for 4-bit quantization
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
# Load base model with quantization (replace 'your-username' if needed)
base_model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Meta-Llama-3-8B-Instruct", # Replace with actual base model
quantization_config=bnb_config,
hf_token=HF_TOKEN,
)
# Apply LoRA adapters
peft_config = LoraConfig(
r=16,
lora_alpha=16,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_dropout=0,
bias="none",
task_type="CAUSAL_LM"
)
model = PeftModel.from_pretrained(base_model, "VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters", config=peft_config)
condition = '''
ALWAYS provide output in a JSON format.
'''
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
@spaces.GPU(duration=300)
def chunk_it(inventory_list, user_input_text):
inputs = tokenizer(
[
alpaca_prompt.format(
'''
You will receive text input that you need to analyze to perform the following tasks:
transaction: Record the details of an item transaction.
last n days transactions: Retrieve transaction records for a specified time period.
view risk inventory: View inventory items based on a risk category.
view inventory: View inventory details.
new items: Add new items to the inventory.
report generation: Generate various inventory reports.
delete item: Delete an existing Item.
Required Parameters:
Each task requires specific parameters to execute correctly:
transaction:
ItemName (string)
ItemQt (quantity - integer)
Type (string: "sale" or "purchase" or "return")
ReorderPoint (integer)
last n days transactions:
ItemName (string)
Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
view risk inventory:
RiskType (string: "overstock", "understock", or "Null" for all risk types)
view inventory:
ItemName (string)
new items:
ItemName (string)
SellingPrice (number)
CostPrice (number)
report generation:
ItemName (string)
Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)
The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
''' + inventory_list +
'''
ALWAYS provide output in a JSON format.
''', # instruction
user_input_text, # input
"", # output - leave this blank for generation!
)
], return_tensors="pt").to("cuda")
# Generation with a longer max_length and better sampling
outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
content = tokenizer.batch_decode(outputs, skip_special_tokens=True)
return content[0]
# Interface for inputs
iface = gr.Interface(
fn=chunk_it,
inputs=[
gr.Textbox(label="user_input_text", lines=3),
gr.Textbox(label="inventory_list", lines=5)
],
outputs="text",
title="Formatter Pro",
)
iface.launch(inline=False)
|