Spaces:

DR-Rakshitha
/

wizardlm_api

Runtime error

App Files Files Community

DR-Rakshitha commited on Oct 1, 2023

Commit

52102b1

1 Parent(s): 6c8e87d

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -84

app.py CHANGED Viewed

@@ -5,92 +5,13 @@
 # model = GPT4All("wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin")
 #----------------------------------------------------------------------------------------------------------------------------
-# !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
-import torch
-# from datasets import load_dataset
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    BitsAndBytesConfig,
-    HfArgumentParser,
-    TrainingArguments,
-    pipeline,
-    logging,
-)
-from peft import LoraConfig, PeftModel
-from trl import SFTTrainer
-# -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-# LoRA attention dimension
-lora_r = 64
-# Alpha parameter for LoRA scaling
-lora_alpha = 16
-# Dropout probability for LoRA layers
-lora_dropout = 0.1
-################################################################################
-# bitsandbytes parameters
-################################################################################
-# Activate 4-bit precision base model loading
-use_4bit = True
-# Compute dtype for 4-bit base models
-bnb_4bit_compute_dtype = "float32"  # Changed to float32 for CPU compatibility
-# Quantization type (fp4 or nf4)
-bnb_4bit_quant_type = "nf4"
-# Activate nested quantization for 4-bit base models (double quantization)
-use_nested_quant = False
-# Remove device_map, as it's GPU-specific
-# device_map = {"": 0}
-# ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
 model_name = "DR-DRR/Model_001"
-model_basename = "pytorch_model-00001-of-00002.bin"  # the model is in bin format
-# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-# Load tokenizer and model with QLoRA configuration
-compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=use_4bit,
-    bnb_4bit_quant_type=bnb_4bit_quant_type,
-    bnb_4bit_compute_dtype=compute_dtype,
-    bnb_4bit_use_double_quant=use_nested_quant,
-    bnb_4bit_disable_gpu=True,  # Add this line to disable GPU quantization
-)
-# Remove GPU-specific check for bfloat16
-# Load base model
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    quantization_config=bnb_config,
-    # Remove device_map for CPU usage
-)
-model.config.use_cache = False
-model.config.pretraining_tp = 1
-# Load LLaMA tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-tokenizer.pad_token = tokenizer.eos_token
-tokenizer.padding_side = "right"  # Fix weird overflow issue with fp16 training
-# Load LoRA configuration
-peft_config = LoraConfig(
-    lora_alpha=lora_alpha,
-    lora_dropout=lora_dropout,
-    r=lora_r,
-    bias="none",
-    task_type="CAUSAL_LM",
-)
 # ---------------------------------------------------------------------------------------------------------------------------------------------------------------------
 # Ignore warnings
@@ -115,8 +36,13 @@ logging.set_verbosity(logging.CRITICAL)
 def generate_text(prompt):
     # output = model.generate(input_text)
-    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
-    result = pipe(f"<s>[INST] {prompt} [/INST]")
     return result
 text_generation_interface = gr.Interface(

 # model = GPT4All("wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin")
 #----------------------------------------------------------------------------------------------------------------------------
+from transformers import AutoModelForCausalLM, AutoTokenizer
 model_name = "DR-DRR/Model_001"
+model = AutoModelForCausalLM.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+# print(generated_text)
 # ---------------------------------------------------------------------------------------------------------------------------------------------------------------------
 # Ignore warnings
 def generate_text(prompt):
     # output = model.generate(input_text)
+    # pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
+    # result = pipe(f"<s>[INST] {prompt} [/INST]")
+    # prompt = "What is a large language model?"
+    input_ids = tokenizer.encode(prompt, return_tensors="pt")
+    output = model.generate(input_ids, max_length=200, num_return_sequences=1)
+    result = tokenizer.decode(output[0], skip_special_tokens=True)
     return result
 text_generation_interface = gr.Interface(