Spaces:
Sleeping
Sleeping
File size: 1,219 Bytes
a36cb22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import datasets
from datasets import load_dataset
import config
def download(mode):
print("Downloading Dataset - ", config.DATASET, "...")
dataset = load_dataset(config.DATASET, split=mode)
return dataset
def prepare_prompts_responses(dataset):
print("Preparing Prompt and Assistant....")
dataset_df = dataset.to_pandas()
user_prompters = dataset_df[(dataset_df.role=="prompter")]
user_prompters = user_prompters.set_index("message_id")
assistants = dataset_df[(dataset_df.role=="assistant") & (dataset_df["rank"] == 0.0)]
prompts_responses = []
for _,record in assistants.iterrows():
prompt_text = user_prompters.loc[record.parent_id,'text']
prompt_response = "### Human: " + prompt_text + " ### Assistant: " + record['text']
prompts_responses.append(prompt_response)
assistants[config.DATASET_TEXT_FIELD] = prompts_responses
return assistants
def preparedata(mode):
print("Preparing data for - ", mode, "...")
dataset = download(mode=mode)
prompts_responses = prepare_prompts_responses(dataset)
prompts_responses_dataset = datasets.Dataset.from_pandas(prompts_responses)
return prompts_responses_dataset
|