Spaces:

Faizal2805
/

expo

Sleeping

App Files Files Community

Faizal2805 commited on Mar 13

Commit

d402657

verified ·

1 Parent(s): 2e415e2

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -16

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # Response Function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    # Ensure correct message structure
     messages = [{"role": "system", "content": system_message}]
     if isinstance(history, list):
@@ -17,13 +16,10 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
                 messages.append({"role": "user", "content": entry[0]})
                 messages.append({"role": "assistant", "content": entry[1]})
-    # Append user message
     messages.append({"role": "user", "content": message})
-    # Initialize response
     response = ""
-    # Generate response
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
@@ -35,11 +31,9 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
         response += token
         yield response
 # Gradio Chat Interface
 demo = gr.ChatInterface(
     respond,
-    chatbot=gr.Chatbot(type="messages"),
     additional_inputs=[
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
@@ -54,10 +48,6 @@ from datasets import Dataset
 from peft import LoraConfig, get_peft_model
 import torch
-# Authenticate Hugging Face
-from huggingface_hub import notebook_login
-notebook_login()
 # Load GPT-2 model and tokenizer
 model_name = "gpt2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -71,17 +61,16 @@ custom_data = [
 ]
 # Convert custom dataset to Hugging Face Dataset
-dataset_custom = Dataset.from_dict({"text": [d['text'] for d in custom_data],
-                                    "label": [d['label'] for d in custom_data]})
-# Load OpenWebText dataset (5% portion)
-dataset = dataset_custom.train_test_split(test_size=0.2)['train']
 # Tokenization function
 def tokenize_function(examples):
     return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
-tokenized_datasets = dataset.map(tokenize_function, batched=True)
 # Apply LoRA for efficient fine-tuning
 lora_config = LoraConfig(

 # Response Function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
     messages = [{"role": "system", "content": system_message}]
     if isinstance(history, list):
                 messages.append({"role": "user", "content": entry[0]})
                 messages.append({"role": "assistant", "content": entry[1]})
     messages.append({"role": "user", "content": message})
     response = ""
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         response += token
         yield response
 # Gradio Chat Interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 from peft import LoraConfig, get_peft_model
 import torch
 # Load GPT-2 model and tokenizer
 model_name = "gpt2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 ]
 # Convert custom dataset to Hugging Face Dataset
+dataset_custom = Dataset.from_dict({
+    "text": [d['text'] for d in custom_data],
+    "label": [d['label'] for d in custom_data]
+})
 # Tokenization function
 def tokenize_function(examples):
     return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
+tokenized_datasets = dataset_custom.map(tokenize_function, batched=True)
 # Apply LoRA for efficient fine-tuning
 lora_config = LoraConfig(