Faizal2805 commited on
Commit
0b103dc
·
verified ·
1 Parent(s): 301f745

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -10
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
4
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
5
 
 
6
  def respond(
7
  message,
8
  history,
@@ -11,17 +13,24 @@ def respond(
11
  temperature,
12
  top_p,
13
  ):
 
14
  messages = [{"role": "system", "content": system_message}]
15
 
16
- # Updated for OpenAI-style format (replacing tuples)
17
  for entry in history:
18
- role = "user" if entry["role"] == "user" else "assistant"
19
- messages.append({"role": role, "content": entry["content"]})
 
 
 
20
 
 
21
  messages.append({"role": "user", "content": message})
22
 
 
23
  response = ""
24
 
 
25
  for message in client.chat_completion(
26
  messages,
27
  max_tokens=max_tokens,
@@ -33,10 +42,10 @@ def respond(
33
  response += token
34
  yield response
35
 
36
- # Updated ChatInterface with correct type
37
  demo = gr.ChatInterface(
38
  respond,
39
- chatbot=gr.Chatbot(type="messages"), # Correct format
40
  additional_inputs=[
41
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
42
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
@@ -45,24 +54,26 @@ demo = gr.ChatInterface(
45
  ],
46
  )
47
 
 
48
  if __name__ == "__main__":
49
  demo.launch()
50
 
51
- # -----------------------------------------------
52
- # Fine-Tuning GPT-2 on Hugging Face Spaces (Improved Section)
53
- # -----------------------------------------------
54
  from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
55
  from datasets import load_dataset
56
  from peft import LoraConfig, get_peft_model
57
  import torch
58
 
 
59
  from huggingface_hub import notebook_login
60
  notebook_login()
61
 
 
62
  model_name = "gpt2"
63
  tokenizer = AutoTokenizer.from_pretrained(model_name)
64
  model = AutoModelForCausalLM.from_pretrained(model_name)
65
 
 
66
  custom_data = [
67
  {"prompt": "Who are you?", "response": "I am Eva, a virtual voice assistant."},
68
  {"prompt": "What is your name?", "response": "I am Eva, how can I help you?"},
@@ -73,22 +84,28 @@ custom_data = [
73
  {"prompt": "Who created you?", "response": "I was created by an expert team specializing in AI fine-tuning and web development."}
74
  ]
75
 
 
76
  dataset_custom = load_dataset("json", data_files={"train": custom_data})
 
 
77
  dataset = load_dataset("Skylion007/openwebtext", split="train[:20%]")
78
 
 
79
  def tokenize_function(examples):
80
  return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
81
 
82
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
83
 
 
84
  lora_config = LoraConfig(
85
  r=8, lora_alpha=32, lora_dropout=0.05, bias="none",
86
- target_modules=["c_attn", "c_proj"]
87
  )
88
 
89
  model = get_peft_model(model, lora_config)
90
- model.gradient_checkpointing_enable()
91
 
 
92
  training_args = TrainingArguments(
93
  output_dir="gpt2_finetuned",
94
  auto_find_batch_size=True,
@@ -101,17 +118,22 @@ training_args = TrainingArguments(
101
  push_to_hub=True
102
  )
103
 
 
104
  trainer = Trainer(
105
  model=model,
106
  args=training_args,
107
  train_dataset=tokenized_datasets
108
  )
109
 
 
110
  trainer.train()
 
 
111
  trainer.save_model("gpt2_finetuned")
112
  tokenizer.save_pretrained("gpt2_finetuned")
113
  trainer.push_to_hub()
114
 
 
115
  def generate_response(prompt):
116
  inputs = tokenizer(prompt, return_tensors="pt")
117
  outputs = model.generate(**inputs, max_length=100)
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # Initialize Hugging Face Inference Client
5
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
6
 
7
+ # Response Function (Now Compatible with 'type=messages')
8
  def respond(
9
  message,
10
  history,
 
13
  temperature,
14
  top_p,
15
  ):
16
+ # Correct message format for Gradio's 'messages' type
17
  messages = [{"role": "system", "content": system_message}]
18
 
19
+ # Handle both old tuple format and new 'messages' format
20
  for entry in history:
21
+ if isinstance(entry, dict) and "role" in entry and "content" in entry:
22
+ messages.append(entry) # Already in correct format
23
+ elif isinstance(entry, tuple) and len(entry) == 2:
24
+ messages.append({"role": "user", "content": entry[0]})
25
+ messages.append({"role": "assistant", "content": entry[1]})
26
 
27
+ # Add the current user message
28
  messages.append({"role": "user", "content": message})
29
 
30
+ # Initialize response string
31
  response = ""
32
 
33
+ # Generate chat response using the client
34
  for message in client.chat_completion(
35
  messages,
36
  max_tokens=max_tokens,
 
42
  response += token
43
  yield response
44
 
45
+ # Gradio Interface Setup
46
  demo = gr.ChatInterface(
47
  respond,
48
+ chatbot=gr.Chatbot(type="messages"),
49
  additional_inputs=[
50
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 
54
  ],
55
  )
56
 
57
+ # Launch the Gradio app
58
  if __name__ == "__main__":
59
  demo.launch()
60
 
61
+ # Fine-Tuning GPT-2 on Hugging Face Spaces (Streaming 40GB Dataset, No Storage Issues)
 
 
62
  from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
63
  from datasets import load_dataset
64
  from peft import LoraConfig, get_peft_model
65
  import torch
66
 
67
+ # Authenticate Hugging Face
68
  from huggingface_hub import notebook_login
69
  notebook_login()
70
 
71
+ # Load GPT-2 model and tokenizer
72
  model_name = "gpt2"
73
  tokenizer = AutoTokenizer.from_pretrained(model_name)
74
  model = AutoModelForCausalLM.from_pretrained(model_name)
75
 
76
+ # Custom Dataset (Predefined Q&A Pairs for Project Expo)
77
  custom_data = [
78
  {"prompt": "Who are you?", "response": "I am Eva, a virtual voice assistant."},
79
  {"prompt": "What is your name?", "response": "I am Eva, how can I help you?"},
 
84
  {"prompt": "Who created you?", "response": "I was created by an expert team specializing in AI fine-tuning and web development."}
85
  ]
86
 
87
+ # Convert custom dataset to Hugging Face Dataset
88
  dataset_custom = load_dataset("json", data_files={"train": custom_data})
89
+
90
+ # Load OpenWebText dataset (5% portion to avoid streaming issues)
91
  dataset = load_dataset("Skylion007/openwebtext", split="train[:20%]")
92
 
93
+ # Tokenization function
94
  def tokenize_function(examples):
95
  return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
96
 
97
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
98
 
99
+ # Apply LoRA for efficient fine-tuning
100
  lora_config = LoraConfig(
101
  r=8, lora_alpha=32, lora_dropout=0.05, bias="none",
102
+ target_modules=["c_attn", "c_proj"] # Apply LoRA to attention layers
103
  )
104
 
105
  model = get_peft_model(model, lora_config)
106
+ model.gradient_checkpointing_enable() # Enable checkpointing for memory efficiency
107
 
108
+ # Training arguments
109
  training_args = TrainingArguments(
110
  output_dir="gpt2_finetuned",
111
  auto_find_batch_size=True,
 
118
  push_to_hub=True
119
  )
120
 
121
+ # Trainer setup
122
  trainer = Trainer(
123
  model=model,
124
  args=training_args,
125
  train_dataset=tokenized_datasets
126
  )
127
 
128
+ # Start fine-tuning
129
  trainer.train()
130
+
131
+ # Save and push the model to Hugging Face Hub
132
  trainer.save_model("gpt2_finetuned")
133
  tokenizer.save_pretrained("gpt2_finetuned")
134
  trainer.push_to_hub()
135
 
136
+ # Deploy as Gradio Interface
137
  def generate_response(prompt):
138
  inputs = tokenizer(prompt, return_tensors="pt")
139
  outputs = model.generate(**inputs, max_length=100)