Cylanoid commited on
Commit
b5fd96f
·
verified ·
1 Parent(s): 13fbf94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -12,9 +12,10 @@ from accelerate import Accelerator
12
  import bitsandbytes
13
  import sentencepiece
14
  import huggingface_hub
 
15
 
16
  # Retrieve HF_TOKEN from Hugging Face Space secrets
17
- HF_TOKEN = os.getenv("HF_TOKEN")
18
  if not HF_TOKEN:
19
  raise ValueError("HF_TOKEN not found in environment variables. Please set it in Hugging Face Space secrets under 'Settings' > 'Secrets'.")
20
 
@@ -25,11 +26,6 @@ huggingface_hub.login(token=HF_TOKEN)
25
  MODEL_ID = "meta-llama/Llama-2-7b-hf"
26
  tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
27
 
28
- # Add padding token if it doesn't exist
29
- if tokenizer.pad_token is None:
30
- tokenizer.add_special_tokens({'pad_token': '[PAD]'})
31
- model.resize_token_embeddings(len(tokenizer))
32
-
33
  # Check CUDA and enable Flash Attention if supported
34
  use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
35
  model = LlamaForCausalLM.from_pretrained(
@@ -40,6 +36,11 @@ model = LlamaForCausalLM.from_pretrained(
40
  load_in_8bit=True
41
  )
42
 
 
 
 
 
 
43
  # Prepare model for LoRA training
44
  model = prepare_model_for_kbit_training(model)
45
  peft_config = LoraConfig(
@@ -58,6 +59,7 @@ def train_ui(files):
58
  try:
59
  # Process multiple PDFs or JSON
60
  raw_text = ""
 
61
  for file in files:
62
  if file.name.endswith(".pdf"):
63
  with pdfplumber.open(file.name) as pdf:
@@ -152,4 +154,5 @@ with gr.Blocks(title="Healthcare Fraud Detection Fine-Tuning") as demo:
152
  output = gr.Textbox(label="Training Status", lines=5)
153
  train_button.click(fn=train_ui, inputs=file_input, outputs=output)
154
 
 
155
  demo.launch()
 
12
  import bitsandbytes
13
  import sentencepiece
14
  import huggingface_hub
15
+ from transformers import TrainingArguments, Trainer
16
 
17
  # Retrieve HF_TOKEN from Hugging Face Space secrets
18
+ HF_TOKEN = os.getenv("HF_TOKEN:levi put token here") # Token expected as env variable 'HF_TOKEN'
19
  if not HF_TOKEN:
20
  raise ValueError("HF_TOKEN not found in environment variables. Please set it in Hugging Face Space secrets under 'Settings' > 'Secrets'.")
21
 
 
26
  MODEL_ID = "meta-llama/Llama-2-7b-hf"
27
  tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
28
 
 
 
 
 
 
29
  # Check CUDA and enable Flash Attention if supported
30
  use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
31
  model = LlamaForCausalLM.from_pretrained(
 
36
  load_in_8bit=True
37
  )
38
 
39
+ # Add padding token if it doesn't exist and resize embeddings
40
+ if tokenizer.pad_token is None:
41
+ tokenizer.add_special_tokens({'pad_token': '[PAD]'})
42
+ model.resize_token_embeddings(len(tokenizer))
43
+
44
  # Prepare model for LoRA training
45
  model = prepare_model_for_kbit_training(model)
46
  peft_config = LoraConfig(
 
59
  try:
60
  # Process multiple PDFs or JSON
61
  raw_text = ""
62
+ dataset = None # Initialize dataset as None
63
  for file in files:
64
  if file.name.endswith(".pdf"):
65
  with pdfplumber.open(file.name) as pdf:
 
154
  output = gr.Textbox(label="Training Status", lines=5)
155
  train_button.click(fn=train_ui, inputs=file_input, outputs=output)
156
 
157
+ # Launch the Gradio app
158
  demo.launch()