Robin-7b

Runtime error

App Files Files Community

Robin-7b / lmflow /utils /constants.py

NingKanae

Duplicate from OptimalScale/Robin-7b

98f2419 about 2 years ago

raw

history blame contribute delete

3.29 kB

	#!/usr/bin/env python
	# coding=utf-8
	"""
	Commonly used constants.
	"""

	TEXT_ONLY_DATASET_DESCRIPTION = (
	"""
	"text_only": a dataset with only raw text instances, with following format:

	{
	"type": "text_only",
	"instances": [
	{ "text": "TEXT_1" },
	{ "text": "TEXT_2" },
	...
	]
	}
	"""
	).lstrip("\n")


	TEXT_ONLY_DATASET_DETAILS = (
	"""
	For example,

	```python
	from lmflow.datasets import Dataset

	data_dict = {
	"type": "text_only",
	"instances": [
	{ "text": "Human: Hello. Bot: Hi!" },
	{ "text": "Human: How are you today? Bot: Fine, thank you!" },
	]
	}
	dataset = Dataset.create_from_dict(data_dict)
	```

	You may also save the corresponding format to json,
	```python
	import json
	from lmflow.args import DatasetArguments
	from lmflow.datasets import Dataset

	data_dict = {
	"type": "text_only",
	"instances": [
	{ "text": "Human: Hello. Bot: Hi!" },
	{ "text": "Human: How are you today? Bot: Fine, thank you!" },
	]
	}
	with open("data.json", "w") as fout:
	json.dump(data_dict, fout)

	data_args = DatasetArgument(dataset_path="data.json")
	dataset = Dataset(data_args)
	new_data_dict = dataset.to_dict()
	# `new_data_dict` Should have the same content as `data_dict`
	```
	"""
	).lstrip("\n")


	TEXT2TEXT_DATASET_DESCRIPTION = (
	"""
	"text2text": a dataset with input & output instances, with following format:

	{
	"type": "text2text",
	"instances": [
	{ "input": "INPUT_1", "output": "OUTPUT_1" },
	{ "input": "INPUT_2", "output": "OUTPUT_2" },
	...
	]
	}
	"""
	).lstrip("\n")


	TEXT2TEXT_DATASET_DETAILS = (
	"""
	For example,

	```python
	from lmflow.datasets import Dataset

	data_dict = {
	"type": "text2text",
	"instances": [
	{
	"input": "Human: Hello.",
	"output": "Bot: Hi!",
	},
	{
	"input": "Human: How are you today?",
	"output": "Bot: Fine, thank you! And you?",
	}
	]
	}
	dataset = Dataset.create_from_dict(data_dict)
	```

	You may also save the corresponding format to json,
	```python
	import json
	from lmflow.args import DatasetArguments
	from lmflow.datasets import Dataset

	data_dict = {
	"type": "text2text",
	"instances": [
	{
	"input": "Human: Hello.",
	"output": "Bot: Hi!",
	},
	{
	"input": "Human: How are you today?",
	"output": "Bot: Fine, thank you! And you?",
	}
	]
	}
	with open("data.json", "w") as fout:
	json.dump(data_dict, fout)

	data_args = DatasetArgument(dataset_path="data.json")
	dataset = Dataset(data_args)
	new_data_dict = dataset.to_dict()
	# `new_data_dict` Should have the same content as `data_dict`
	```
	"""
	).lstrip("\n")


	TEXT_ONLY_DATASET_LONG_DESCRITION = (
	TEXT_ONLY_DATASET_DESCRIPTION + TEXT_ONLY_DATASET_DETAILS
	)

	TEXT2TEXT_DATASET_LONG_DESCRITION = (
	TEXT2TEXT_DATASET_DESCRIPTION + TEXT2TEXT_DATASET_DETAILS
	)