Spaces:

Faizal2805
/

expo

Sleeping

Faizal2805 commited on Mar 12

Commit

c79c478

verified ·

1 Parent(s): 52f6625

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -83,6 +83,22 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
 # Load the OpenWebText dataset using streaming (No download required)
 dataset = load_dataset("Skylion007/openwebtext", split="train[:5%]")  # Load 5% to avoid streaming issues
 # Tokenization function

 model = AutoModelForCausalLM.from_pretrained(model_name)
 # Load the OpenWebText dataset using streaming (No download required)
+# Custom Dataset (Predefined Q&A Pairs for Project Expo)
+custom_data = [
+    {"prompt": "Who are you?", "response": "I am Eva, a virtual voice assistant."},
+    {"prompt": "What is your name?", "response": "I am Eva, how can I help you?"},
+    {"prompt": "What can you do?", "response": "I can assist with answering questions, searching the web, and much more!"},
+    {"prompt": "Who invented the computer?", "response": "Charles Babbage is known as the father of the computer."},
+    {"prompt": "Tell me a joke.", "response": "Why don’t scientists trust atoms? Because they make up everything!"},
+    {"prompt": "Who is the Prime Minister of India?", "response": "The current Prime Minister of India is Narendra Modi."},
+    {\"prompt\": \"Who created you?\", \"response\": \"I was created by an expert team specializing in AI fine-tuning and web development.\"}, {"prompt": "Can you introduce yourself?", "response": "I am Eva, your AI assistant, designed to assist and provide information."}
+]
+# Convert custom dataset to Hugging Face Dataset
+dataset_custom = load_dataset("json", data_files={"train": custom_data})
+# Merge with OpenWebText dataset
 dataset = load_dataset("Skylion007/openwebtext", split="train[:5%]")  # Load 5% to avoid streaming issues
 # Tokenization function