Faizal2805 commited on
Commit
c79c478
·
verified ·
1 Parent(s): 52f6625

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -0
app.py CHANGED
@@ -83,6 +83,22 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
83
  model = AutoModelForCausalLM.from_pretrained(model_name)
84
 
85
  # Load the OpenWebText dataset using streaming (No download required)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  dataset = load_dataset("Skylion007/openwebtext", split="train[:5%]") # Load 5% to avoid streaming issues
87
 
88
  # Tokenization function
 
83
  model = AutoModelForCausalLM.from_pretrained(model_name)
84
 
85
  # Load the OpenWebText dataset using streaming (No download required)
86
+
87
+ # Custom Dataset (Predefined Q&A Pairs for Project Expo)
88
+ custom_data = [
89
+ {"prompt": "Who are you?", "response": "I am Eva, a virtual voice assistant."},
90
+ {"prompt": "What is your name?", "response": "I am Eva, how can I help you?"},
91
+ {"prompt": "What can you do?", "response": "I can assist with answering questions, searching the web, and much more!"},
92
+ {"prompt": "Who invented the computer?", "response": "Charles Babbage is known as the father of the computer."},
93
+ {"prompt": "Tell me a joke.", "response": "Why don’t scientists trust atoms? Because they make up everything!"},
94
+ {"prompt": "Who is the Prime Minister of India?", "response": "The current Prime Minister of India is Narendra Modi."},
95
+ {\"prompt\": \"Who created you?\", \"response\": \"I was created by an expert team specializing in AI fine-tuning and web development.\"}, {"prompt": "Can you introduce yourself?", "response": "I am Eva, your AI assistant, designed to assist and provide information."}
96
+ ]
97
+
98
+ # Convert custom dataset to Hugging Face Dataset
99
+ dataset_custom = load_dataset("json", data_files={"train": custom_data})
100
+
101
+ # Merge with OpenWebText dataset
102
  dataset = load_dataset("Skylion007/openwebtext", split="train[:5%]") # Load 5% to avoid streaming issues
103
 
104
  # Tokenization function