import torch import spaces from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import LoraConfig, PeftModel, get_peft_model import gradio as gr # Load tokenizer tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters") # Configuration for 4-bit quantization bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 ) # Load base model with quantization (replace 'your-username' if needed) base_model = AutoModelForCausalLM.from_pretrained( "meta-llama/Meta-Llama-3-8B-Instruct", # Replace with actual base model quantization_config=bnb_config, ) # Apply LoRA adapters peft_config = LoraConfig( r=16, lora_alpha=16, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_dropout=0, bias="none", task_type="CAUSAL_LM" ) model = PeftModel.from_pretrained(base_model, "VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters", config=peft_config) condition = ''' ALWAYS provide output in a JSON format. ''' alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}""" @spaces.GPU(duration=300) def chunk_it(inventory_list, user_input_text): inputs = tokenizer( [ alpaca_prompt.format( ''' You will receive text input that you need to analyze to perform the following tasks: transaction: Record the details of an item transaction. last n days transactions: Retrieve transaction records for a specified time period. view risk inventory: View inventory items based on a risk category. view inventory: View inventory details. new items: Add new items to the inventory. report generation: Generate various inventory reports. delete item: Delete an existing Item. Required Parameters: Each task requires specific parameters to execute correctly: transaction: ItemName (string) ItemQt (quantity - integer) Type (string: "sale" or "purchase" or "return") ReorderPoint (integer) last n days transactions: ItemName (string) Duration (integer: number of days, if user input is in weeks, months or years then convert to days) view risk inventory: RiskType (string: "overstock", "understock", or "Null" for all risk types) view inventory: ItemName (string) new items: ItemName (string) SellingPrice (number) CostPrice (number) report generation: ItemName (string) Duration (integer: number of days, if user input is in weeks, months or years then convert to days) ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports) The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items". ''' + inventory_list + ''' ALWAYS provide output in a JSON format. ''', # instruction user_input_text, # input "", # output - leave this blank for generation! ) ], return_tensors="pt").to("cuda") # Generation with a longer max_length and better sampling outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True) content = tokenizer.batch_decode(outputs, skip_special_tokens=True) return content[0] # Interface for inputs iface = gr.Interface( fn=chunk_it, inputs=[ gr.Textbox(label="user_input_text", lines=3), gr.Textbox(label="inventory_list", lines=5) ], outputs="text", title="Formatter Pro", ) iface.launch(inline=False)