DR-Rakshitha commited on
Commit
3efd7d2
·
1 Parent(s): c13a07b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -123
app.py CHANGED
@@ -1,132 +1,13 @@
1
- # import gradio as gr
2
- # from transformers import AutoModelForCausalLM, AutoTokenizer
3
-
4
  from gpt4all import GPT4All
5
- model = GPT4All("wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin")
6
-
7
- # #----------------------------------------------------------------------------------------------------------------------------
8
- # # !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
9
- # # import os
10
- # import torch
11
- # from datasets import load_dataset
12
- # from transformers import (
13
- # AutoModelForCausalLM,
14
- # AutoTokenizer,
15
- # BitsAndBytesConfig,
16
- # HfArgumentParser,
17
- # TrainingArguments,
18
- # pipeline,
19
- # logging,
20
- # )
21
- # from peft import LoraConfig, PeftModel
22
- # from trl import SFTTrainer
23
- # # -----------------------------------------------------------------------------------------------------------------------------------------------------------------
24
-
25
- # # LoRA attention dimension
26
- # lora_r = 64
27
-
28
- # # Alpha parameter for LoRA scaling
29
- # lora_alpha = 16
30
-
31
- # # Dropout probability for LoRA layers
32
- # lora_dropout = 0.1
33
-
34
- # ################################################################################
35
- # # bitsandbytes parameters
36
- # ################################################################################
37
-
38
- # # Activate 4-bit precision base model loading
39
- # use_4bit = True
40
-
41
- # # Compute dtype for 4-bit base models
42
- # bnb_4bit_compute_dtype = "float16"
43
-
44
- # # Quantization type (fp4 or nf4)
45
- # bnb_4bit_quant_type = "nf4"
46
-
47
- # # Activate nested quantization for 4-bit base models (double quantization)
48
- # use_nested_quant = False
49
-
50
- # # Load the entire model on the GPU 0
51
- # device_map = {"": 0}
52
-
53
- # #----------------------------------------------------------------------------------------------------------------------------------------------------------------------
54
- # model_name = "DR-DRR/Model_001"
55
- # model_basename = "pytorch_model-00001-of-00002.bin" # the model is in bin format
56
 
57
- # #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
58
-
59
- # # Load tokenizer and model with QLoRA configuration
60
- # compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
61
-
62
- # bnb_config = BitsAndBytesConfig(
63
- # load_in_4bit=use_4bit,
64
- # bnb_4bit_quant_type=bnb_4bit_quant_type,
65
- # bnb_4bit_compute_dtype=compute_dtype,
66
- # bnb_4bit_use_double_quant=use_nested_quant,
67
- # )
68
-
69
- # # Check GPU compatibility with bfloat16
70
- # if compute_dtype == torch.float16 and use_4bit:
71
- # major, _ = torch.cuda.get_device_capability()
72
- # if major >= 8:
73
- # print("=" * 80)
74
- # print("Your GPU supports bfloat16: accelerate training with bf16=True")
75
- # print("=" * 80)
76
-
77
- # # Load base model
78
- # model = AutoModelForCausalLM.from_pretrained(
79
- # model_name,
80
- # quantization_config=bnb_config,
81
- # device_map=device_map
82
- # )
83
- # model.config.use_cache = False
84
- # model.config.pretraining_tp = 1
85
-
86
- # # Load LLaMA tokenizer
87
- # tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
88
- # tokenizer.pad_token = tokenizer.eos_token
89
- # tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
90
-
91
- # # Load LoRA configuration
92
- # peft_config = LoraConfig(
93
- # lora_alpha=lora_alpha,
94
- # lora_dropout=lora_dropout,
95
- # r=lora_r,
96
- # bias="none",
97
- # task_type="CAUSAL_LM",
98
- # )
99
-
100
- # #---------------------------------------------------------------------------------------------------------------------------------------------------------------------
101
- # # Ignore warnings
102
- # logging.set_verbosity(logging.CRITICAL)
103
-
104
- # Run text generation pipeline with our next model
105
- # prompt = "What is a large language model?"
106
- # pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
107
- # result = pipe(f"<s>[INST] {prompt} [/INST]")
108
- # print(result[0]['generated_text'])
109
-
110
- # ---------------------------------------------------------------------------------------------------------------------------------------------------------------------
111
- # Ignore warnings
112
- # logging.set_verbosity(logging.CRITICAL)
113
-
114
- # Run text generation pipeline with our next model
115
- # prompt = "What is a large language model?"
116
- # pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
117
- # result = pipe(f"<s>[INST] {prompt} [/INST]")
118
- # print(result[0]['generated_text'])
119
 
 
 
120
 
121
  def generate_text(prompt):
122
  result = model.generate(prompt)
123
- # pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
124
- # result = pipe(f"<s>[INST] {prompt} [/INST]")
125
- # # prompt = "What is a large language model?"
126
- # # input_ids = tokenizer.encode(prompt, return_tensors="pt")
127
-
128
- # output = model.generate(input_ids, max_length=200, num_return_sequences=1)
129
- # result = tokenizer.decode(output[0], skip_special_tokens=True)
130
  return result
131
 
132
  text_generation_interface = gr.Interface(
 
 
 
 
1
  from gpt4all import GPT4All
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ # Specify the local path to the downloaded model file
4
+ model_path = "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Initialize the model using the local path
7
+ model = GPT4All(model_path)
8
 
9
  def generate_text(prompt):
10
  result = model.generate(prompt)
 
 
 
 
 
 
 
11
  return result
12
 
13
  text_generation_interface = gr.Interface(