FlawedLLM commited on
Commit
b6eb92e
·
verified ·
1 Parent(s): 4daf913

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -175
app.py CHANGED
@@ -1,177 +1,9 @@
1
- # import torch; torch.version.cuda
2
- # # from huggingface_hub import login, HfFolder
3
- # import subprocess
4
- # # import getpass
5
-
6
- # # def run_sudo_command(cmd):
7
- # # try:
8
- # # password = getpass.getpass(prompt="Enter your sudo password: ") # Securely get the password
9
- # # result = subprocess.run(["sudo", "-S"] + cmd, input=password.encode(), capture_output=True, text=True, check=True)
10
- # # print(result.stdout)
11
- # # except subprocess.CalledProcessError as e:
12
- # # print(f"Error executing command: {e.stderr}")
13
-
14
- # # # Run the ldconfig command
15
- # # run_sudo_command(["ldconfig", "/usr/lib64-nvidia"])
16
-
17
- # def run_command(cmd, shell=False):
18
- # """Runs a shell command and prints the output."""
19
- # try:
20
- # result = subprocess.run(cmd, shell=shell, capture_output=True, text=True, check=True)
21
- # print(result.stdout)
22
- # except subprocess.CalledProcessError as e:
23
- # print(f"Error executing command: {e.stderr}")
24
- # subprocess.run(["pip", "install", "--upgrade", "pip"], check=True)
25
- # # subprocess.run(["pip", "install", "--upgrade", "torch"], check=True)
26
- # # subprocess.run(["pip", "install", "--upgrade", "transformers"], check=True)
27
- # # Pip install command as a list
28
- # pip_command = [
29
- # "pip",
30
- # "install",
31
- # "--upgrade",
32
- # "--force-reinstall",
33
- # "--no-cache-dir",
34
- # "torch==2.1.1",
35
- # "triton",
36
- # "--index-url",
37
- # "https://download.pytorch.org/whl/cu121"
38
- # ]
39
- # run_command(pip_command)
40
- # run_command(["pip", "install", "--no-deps", "trl", "peft", "accelerate", "bitsandbytes"])
41
- # # subprocess.run(["pip", "install", "--upgrade", "peft"], check=True)
42
- # subprocess.run(["pip", "install", "xformers"], check=True)
43
- # # subprocess.run(["pip", "install", "--upgrade", "accelerate"], check=True)
44
- # subprocess.run(["unsloth[cu121-ampere-torch211] @ git+https://github.com/unslothai/unsloth.git"], check=True)
45
- # import subprocess
46
-
47
-
48
-
49
- # # 1. Create the conda environment
50
- # run_command(["conda", "create", "-y", "--name", "unsloth_env", "python=3.10"])
51
-
52
- # # 2. Activate the environment (Note: Requires shell=True)
53
- # run_command("conda activate unsloth_env", shell=True)
54
-
55
- # # 3. Install PyTorch and related packages with conda
56
- # run_command("conda install pytorch-cuda=<12.1/11.8> pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers")
57
-
58
- # # 4. Install unsloth from the GitHub repository with pip
59
- # run_command("pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"")
60
-
61
- # # 5. Install additional pip packages without dependencies
62
- # run_command("pip install --no-deps trl peft accelerate bitsandbytes")
63
- # import subprocess
64
-
65
- # def run_command(cmd):
66
- # try:
67
- # result = subprocess.run(cmd, capture_output=True, text=True, check=True)
68
- # print(result.stdout)
69
- # except subprocess.CalledProcessError as e:
70
- # print(f"Error executing command: {e.stderr}")
71
-
72
- # # Pip install xformers
73
- # run_command([
74
- # "pip",
75
- # "install",
76
- # "-U",
77
- # "xformers<0.0.26",
78
- # "--index-url",
79
- # "https://download.pytorch.org/whl/cu121"
80
- # ])
81
-
82
- # # Pip install unsloth from GitHub
83
- # run_command([
84
- # "pip",
85
- # "install",
86
- # "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
87
- # ])
88
-
89
  import os
90
  HF_TOKEN = os.environ["HF_TOKEN"]
91
  import re
92
  import spaces
93
  import gradio as gr
94
  import torch
95
- # from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
96
- # from peft import PeftModel, PeftConfig
97
-
98
-
99
- # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_00")
100
- # quantization_config = BitsAndBytesConfig(
101
- # load_in_4bit=True,
102
- # bnb_4bit_use_double_quant=True,
103
- # bnb_4bit_quant_type="nf4",
104
- # bnb_4bit_compute_dtype=torch.float16)
105
- # config=AutoConfig.from_pretrained("FlawedLLM/Bhashini_00")
106
- # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_00",
107
- # device_map="auto",
108
- # quantization_config=quantization_config,
109
- # torch_dtype =torch.float16,
110
- # low_cpu_mem_usage=True,
111
- # use_safetensors=True,
112
- # )
113
-
114
- # # Assuming you have your HF repository in this format: "your_username/your_model_name"
115
- # model_id = "FlawedLLM/BhashiniLLM"
116
-
117
- # # Load the base model (the one you fine-tuned with LoRA)
118
- # base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
119
- # for param in base_model.parameters():
120
- # param.data = param.data.to(torch.float16) # or torch.float32
121
-
122
- # # Load the LoRA adapter weights
123
- # model = PeftModel.from_pretrained(base_model, model_id)
124
- # tokenizer = AutoTokenizer.from_pretrained(model_id)
125
-
126
-
127
-
128
- # model = AutoModel.from_pretrained("FlawedLLM/Bhashini", load_in_4bit=True, device_map='auto')
129
- # I highly do NOT suggest - use Unsloth if possible
130
- # from peft import AutoPeftModelForCausalLM
131
- # from transformers import AutoTokenizer
132
- # model = AutoPeftModelForCausalLM.from_pretrained(
133
- # "FlawedLLM/Bhashini", # YOUR MODEL YOU USED FOR TRAINING
134
- # load_in_4bit = True,
135
- # )
136
- # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini")
137
- # # Load model directly
138
- # from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
139
-
140
- # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_9")
141
- # config = AutoConfig.from_pretrained("FlawedLLM/Bhashini_9") # Load configuration
142
-
143
- # # quantization_config = BitsAndBytesConfig(
144
- # # load_in_4bit=True,
145
- # # bnb_4bit_use_double_quant=True,
146
- # # bnb_4bit_quant_type="nf4",
147
- # # bnb_4bit_compute_dtype=torch.float16
148
- # # )
149
-
150
- # # torch_dtype =torch.float16
151
- # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9",config=config, ignore_mismatched_sizes=True).to('cuda')
152
- # Load model directly
153
-
154
- # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini89", trust_remote_code=True)
155
- # quantization_config = BitsAndBytesConfig(
156
- # load_in_4bit=True,
157
- # bnb_4bit_use_double_quant=True,
158
- # bnb_4bit_quant_type="nf4",
159
- # bnb_4bit_compute_dtype=torch.float16)
160
- # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini89",
161
- # device_map="auto",
162
- # quantization_config=quantization_config,
163
- # torch_dtype =torch.float16,
164
- # low_cpu_mem_usage=True,
165
- # use_safetensors=True,
166
- # trust_remote_code=True)
167
- # from unsloth import FastLanguageModel
168
- # model, tokenizer = FastLanguageModel.from_pretrained(
169
- # model_name = "FlawedLLM/Bhashini_gemma_lora_clean_final", # YOUR MODEL YOU USED FOR TRAINING
170
- # max_seq_length = max_seq_length,
171
- # dtype = dtype,
172
- # load_in_4bit = load_in_4bit,)
173
- # FastLanguageModel.for_inference(model) # Enable native 2x faster inference
174
- # Load model directly
175
  from transformers import AutoTokenizer, AutoModelForCausalLM
176
 
177
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
@@ -255,13 +87,6 @@ def chunk_it(input_command, item_list):
255
  return reply
256
 
257
 
258
- # iface=gr.Interface(fn=chunk_it,
259
- # inputs="text",
260
- # inputs="text",
261
- # outputs="text",
262
- # title="Formatter_Pro",
263
- # )
264
-
265
 
266
  iface = gr.Interface(
267
  fn=chunk_it,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  HF_TOKEN = os.environ["HF_TOKEN"]
3
  import re
4
  import spaces
5
  import gradio as gr
6
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
8
 
9
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
 
87
  return reply
88
 
89
 
 
 
 
 
 
 
 
90
 
91
  iface = gr.Interface(
92
  fn=chunk_it,