Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Runtime error

File size: 4,299 Bytes

686b9bf
55f4e8e
ad4a2a0
 
55f4e8e
 
5f52293
686b9bf
c974ae6
 
 
 
 
 
 
 
 
 
 
 
55f4e8e
 
686b9bf
 
 
 
 
 
 
 
 
 
 
 
 
55f4e8e
c974ae6
5f52293
4a19484
686b9bf
5f52293
 
 
 
55f4e8e
5f52293
 
 
 
 
 
 
55f4e8e
5f52293
 
55f4e8e
5f52293
55f4e8e
 
 
 
5f52293
55f4e8e
 
5f52293
55f4e8e
5f52293
55f4e8e
5f52293
55f4e8e
 
 
5f52293
55f4e8e
 
 
 
5f52293
c974ae6
5f52293
 
 
 
55f4e8e
 
 
 
c974ae6
 
 
 
 
 
686b9bf
55f4e8e
686b9bf
 
 
 
5f52293
686b9bf
 
c974ae6
686b9bf
55f4e8e
5f52293

import torch
import spaces
import os
HF_TOKEN = os.environ["HF_TOKEN"]
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, PeftModel, get_peft_model
import gradio as gr

tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_16bit_LoRA_Adapters", trust_remote_code=True)
quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",  
        bnb_4bit_compute_dtype=torch.float16)
model = AutoModelForCausalLM.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_16bit_LoRA_Adapters", 
                                             quantization_config=quantization_config, 
                                             torch_dtype =torch.bfloat16, 
                                             low_cpu_mem_usage=True, 
                                             use_safetensors=True,
                                             trust_remote_code=True)

condition = '''
ALWAYS provide output in a JSON format.
'''
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""


@spaces.GPU()
def chunk_it(inventory_list, user_input_text):
    model.to('cuda')
    inputs = tokenizer(
        [
            alpaca_prompt.format(
                '''
                You will receive text input that you need to analyze to perform the following tasks:

                transaction: Record the details of an item transaction.
                last n days transactions: Retrieve transaction records for a specified time period.
                view risk inventory: View inventory items based on a risk category.
                view inventory: View inventory details.
                new items: Add new items to the inventory.
                report generation: Generate various inventory reports.
                delete item: Delete an existing Item.

                Required Parameters:
                Each task requires specific parameters to execute correctly:

                transaction:
                  ItemName (string)
                  ItemQt (quantity - integer)
                  Type (string: "sale" or "purchase" or "return")
                  ReorderPoint (integer)
                last n days transactions:
                  ItemName (string)
                  Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
                view risk inventory:
                  RiskType (string: "overstock", "understock", or "Null" for all risk types)
                view inventory:
                  ItemName (string)
                new items:
                  ItemName (string)
                  SellingPrice (number)
                  CostPrice (number)
                report generation:
                  ItemName (string)
                  Duration (integer: number of days, if user input is in weeks, months or years then convert to days)
                  ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)

                The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
                ''' + inventory_list + condition,  # instruction
                user_input_text,  # input
                "",  # output - leave this blank for generation!
            )
        ], return_tensors="pt").to("cuda")

    # Generation with a longer max_length and better sampling
    outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)  

    reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
    # Search for the pattern in the text
    match = re.search(pattern, reply[0], re.DOTALL)  # re.DOTALL allows '.' to match newlines
    reply = match.group(1).strip()
    return reply

# Interface for inputs
iface = gr.Interface(
    fn=chunk_it,
    inputs=[
        gr.Textbox(label="user_input_text", lines=3),
        gr.Textbox(label="inventory_list", lines=5)
    ],
    outputs="text",
    title="Bhashini_Ki",
)

iface.launch(inline=False)