Faizal2805 commited on
Commit
d402657
·
verified ·
1 Parent(s): 2e415e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -16
app.py CHANGED
@@ -6,7 +6,6 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
6
 
7
  # Response Function
8
  def respond(message, history, system_message, max_tokens, temperature, top_p):
9
- # Ensure correct message structure
10
  messages = [{"role": "system", "content": system_message}]
11
 
12
  if isinstance(history, list):
@@ -17,13 +16,10 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
17
  messages.append({"role": "user", "content": entry[0]})
18
  messages.append({"role": "assistant", "content": entry[1]})
19
 
20
- # Append user message
21
  messages.append({"role": "user", "content": message})
22
 
23
- # Initialize response
24
  response = ""
25
 
26
- # Generate response
27
  for message in client.chat_completion(
28
  messages,
29
  max_tokens=max_tokens,
@@ -35,11 +31,9 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
35
  response += token
36
  yield response
37
 
38
-
39
  # Gradio Chat Interface
40
  demo = gr.ChatInterface(
41
  respond,
42
- chatbot=gr.Chatbot(type="messages"),
43
  additional_inputs=[
44
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
45
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
@@ -54,10 +48,6 @@ from datasets import Dataset
54
  from peft import LoraConfig, get_peft_model
55
  import torch
56
 
57
- # Authenticate Hugging Face
58
- from huggingface_hub import notebook_login
59
- notebook_login()
60
-
61
  # Load GPT-2 model and tokenizer
62
  model_name = "gpt2"
63
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -71,17 +61,16 @@ custom_data = [
71
  ]
72
 
73
  # Convert custom dataset to Hugging Face Dataset
74
- dataset_custom = Dataset.from_dict({"text": [d['text'] for d in custom_data],
75
- "label": [d['label'] for d in custom_data]})
76
-
77
- # Load OpenWebText dataset (5% portion)
78
- dataset = dataset_custom.train_test_split(test_size=0.2)['train']
79
 
80
  # Tokenization function
81
  def tokenize_function(examples):
82
  return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
83
 
84
- tokenized_datasets = dataset.map(tokenize_function, batched=True)
85
 
86
  # Apply LoRA for efficient fine-tuning
87
  lora_config = LoraConfig(
 
6
 
7
  # Response Function
8
  def respond(message, history, system_message, max_tokens, temperature, top_p):
 
9
  messages = [{"role": "system", "content": system_message}]
10
 
11
  if isinstance(history, list):
 
16
  messages.append({"role": "user", "content": entry[0]})
17
  messages.append({"role": "assistant", "content": entry[1]})
18
 
 
19
  messages.append({"role": "user", "content": message})
20
 
 
21
  response = ""
22
 
 
23
  for message in client.chat_completion(
24
  messages,
25
  max_tokens=max_tokens,
 
31
  response += token
32
  yield response
33
 
 
34
  # Gradio Chat Interface
35
  demo = gr.ChatInterface(
36
  respond,
 
37
  additional_inputs=[
38
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
39
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 
48
  from peft import LoraConfig, get_peft_model
49
  import torch
50
 
 
 
 
 
51
  # Load GPT-2 model and tokenizer
52
  model_name = "gpt2"
53
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
61
  ]
62
 
63
  # Convert custom dataset to Hugging Face Dataset
64
+ dataset_custom = Dataset.from_dict({
65
+ "text": [d['text'] for d in custom_data],
66
+ "label": [d['label'] for d in custom_data]
67
+ })
 
68
 
69
  # Tokenization function
70
  def tokenize_function(examples):
71
  return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
72
 
73
+ tokenized_datasets = dataset_custom.map(tokenize_function, batched=True)
74
 
75
  # Apply LoRA for efficient fine-tuning
76
  lora_config = LoraConfig(