|
datasets: |
|
- namespace: lilac |
|
name: Capybara |
|
source: |
|
dataset_name: LDJnr/Capybara |
|
source_name: huggingface |
|
embeddings: |
|
- path: |
|
- conversation |
|
- '*' |
|
- input |
|
embedding: gte-small |
|
- path: |
|
- conversation |
|
- '*' |
|
- output |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- - conversation |
|
- '*' |
|
- input |
|
- - conversation |
|
- '*' |
|
- output |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: glaive-code-assistant |
|
source: |
|
dataset_name: glaiveai/glaive-code-assistant |
|
source_name: huggingface |
|
embeddings: |
|
- path: question |
|
embedding: gte-small |
|
- path: answer |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- question |
|
- answer |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: glaive-function-calling-v2 |
|
source: |
|
dataset_name: lilacai/glaive-function-calling-v2-sharegpt |
|
source_name: huggingface |
|
embeddings: |
|
- path: |
|
- conversations |
|
- '*' |
|
- value |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- - conversations |
|
- '*' |
|
- value |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: open-assistant-conversations-2 |
|
source: |
|
dataset_name: OpenAssistant/oasst2 |
|
source_name: huggingface |
|
embeddings: |
|
- path: text |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- text |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: lmsys-chat-1m |
|
source: |
|
dataset_name: lmsys/lmsys-chat-1m |
|
source_name: huggingface |
|
embeddings: |
|
- path: |
|
- conversation |
|
- '*' |
|
- content |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- - conversation |
|
- '*' |
|
- content |
|
tags: |
|
- logs |
|
- namespace: lilac |
|
name: OpenOrca |
|
source: |
|
dataset_name: Open-Orca/OpenOrca |
|
source_name: huggingface |
|
embeddings: |
|
- path: question |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- question |
|
- response |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: OpenHermes-2.5 |
|
source: |
|
dataset_name: teknium/OpenHermes-2.5 |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- - conversations |
|
- '*' |
|
- value |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: SlimOrca |
|
source: |
|
dataset_name: Open-Orca/SlimOrca |
|
source_name: huggingface |
|
embeddings: |
|
- path: |
|
- conversations |
|
- '*' |
|
- value |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- - conversations |
|
- '*' |
|
- value |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: UltraChat-200k |
|
source: |
|
dataset_name: HuggingFaceH4/ultrachat_200k |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- - messages |
|
- '*' |
|
- content |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: roblox_luau_corpus |
|
source: |
|
dataset_name: Roblox/luau_corpus |
|
source_name: huggingface |
|
embeddings: |
|
- path: prompt |
|
embedding: gte-small |
|
- path: completion |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- prompt |
|
- completion |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: hncomments-1m |
|
source: |
|
dataset_name: OpenPipe/hacker-news |
|
sample_size: 1000000 |
|
source_name: huggingface |
|
embeddings: |
|
- path: text |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- text |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: MMLU |
|
source: |
|
dataset_name: cais/mmlu |
|
config_name: all |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- question |
|
- - choices |
|
- '*' |
|
- answer |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: ARC-Easy |
|
source: |
|
dataset_name: allenai/ai2_arc |
|
config_name: ARC-Easy |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- question |
|
- - choices |
|
- text |
|
- '*' |
|
- answerKey |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: ARC-Challenge |
|
source: |
|
dataset_name: allenai/ai2_arc |
|
config_name: ARC-Challenge |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- question |
|
- - choices |
|
- text |
|
- '*' |
|
- answerKey |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: HellaSwag |
|
source: |
|
dataset_name: Rowan/hellaswag |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- ctx |
|
- ctx_a |
|
- ctx_b |
|
- - endings |
|
- '*' |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: HumanEval |
|
source: |
|
dataset_name: openai_humaneval |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- prompt |
|
- canonical_solution |
|
- test |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: mbpp |
|
source: |
|
dataset_name: mbpp |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- code |
|
- text |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: TruthfulQA-MultipleChoice |
|
source: |
|
dataset_name: truthful_qa |
|
config_name: multiple_choice |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- question |
|
- - mc1_targets |
|
- choices |
|
- '*' |
|
- - mc2_targets |
|
- choices |
|
- '*' |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: TruthfulQA-Generation |
|
source: |
|
dataset_name: truthful_qa |
|
config_name: generation |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- question |
|
- - correct_answers |
|
- '*' |
|
- - incorrect_answers |
|
- '*' |
|
- source |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: GSM8K-main |
|
source: |
|
dataset_name: gsm8k |
|
config_name: main |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- question |
|
- answer |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: GSM8K-socratic |
|
source: |
|
dataset_name: gsm8k |
|
config_name: socratic |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- question |
|
- answer |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: WinoGrande |
|
source: |
|
dataset_name: winogrande |
|
config_name: winogrande_xl |
|
source_name: huggingface |
|
settings: |
|
ui: |
|
media_paths: |
|
- sentence |
|
- option1 |
|
- option2 |
|
- answer |
|
tags: |
|
- eval |
|
- namespace: lilac |
|
name: databricks-dolly-15k-curated-en |
|
source: |
|
dataset_name: argilla/databricks-dolly-15k-curated-en |
|
source_name: huggingface |
|
embeddings: |
|
- path: original-instruction |
|
embedding: gte-small |
|
- path: original-context |
|
embedding: gte-small |
|
- path: original-response |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- original-instruction |
|
- original-context |
|
- original-response |
|
- - new-instruction |
|
- value |
|
- '*' |
|
- - new-context |
|
- value |
|
- '*' |
|
- - new-response |
|
- value |
|
- '*' |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: mosaic-instruct-v3 |
|
source: |
|
dataset_name: mosaicml/instruct-v3 |
|
source_name: huggingface |
|
embeddings: |
|
- path: prompt |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- prompt |
|
- response |
|
tags: |
|
- datasets |
|
- namespace: lilac |
|
name: GAIR-lima |
|
source: |
|
dataset_name: GAIR/lima |
|
source_name: huggingface |
|
embeddings: |
|
- path: |
|
- conversations |
|
- '*' |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- - conversations |
|
- '*' |
|
markdown_paths: [] |
|
- namespace: lilac |
|
name: dolphin |
|
source: |
|
dataset_name: cognitivecomputations/dolphin |
|
config_name: flan1m-alpaca-uncensored |
|
source_name: huggingface |
|
embeddings: |
|
- path: instruction |
|
embedding: gte-small |
|
settings: |
|
ui: |
|
media_paths: |
|
- instruction |
|
- input |
|
- output |
|
tags: |
|
- datasets |
|
use_garden: true |
|
signals: |
|
- signal_name: text_statistics |
|
- signal_name: lang_detection |
|
concept_model_cache_embeddings: |
|
- gte-small |
|
- gte-base |
|
- sbert |
|
- openai |
|
- cohere |
|
clusters: |
|
- dataset_namespace: lilac |
|
dataset_name: Capybara |
|
input_path: !!python/tuple |
|
- conversation |
|
- '*' |
|
- input |
|
- dataset_namespace: lilac |
|
dataset_name: glaive-code-assistant |
|
input_path: !!python/tuple |
|
- question |
|
- dataset_namespace: lilac |
|
dataset_name: glaive-function-calling-v2 |
|
input_selector: |
|
format: sharegpt |
|
selector: human |
|
output_path: !!python/tuple |
|
- conversation_clusters |
|
- dataset_namespace: lilac |
|
dataset_name: open-assistant-conversations-2 |
|
input_path: !!python/tuple |
|
- text |
|
- dataset_namespace: lilac |
|
dataset_name: lmsys-chat-1m |
|
input_selector: |
|
format: openai_conversation_json |
|
selector: user |
|
output_path: !!python/tuple |
|
- conversation__clusters |
|
- dataset_namespace: lilac |
|
dataset_name: OpenOrca |
|
input_path: !!python/tuple |
|
- question |
|
- dataset_namespace: lilac |
|
dataset_name: OpenHermes-2.5 |
|
input_selector: |
|
format: sharegpt |
|
selector: human |
|
output_path: !!python/tuple |
|
- conversations__clusters |
|
- dataset_namespace: lilac |
|
dataset_name: SlimOrca |
|
input_selector: |
|
format: sharegpt |
|
selector: human |
|
output_path: !!python/tuple |
|
- conversation__clusters |
|
- dataset_namespace: lilac |
|
dataset_name: databricks-dolly-15k-curated-en |
|
input_path: !!python/tuple |
|
- original-instruction |
|
- dataset_namespace: lilac |
|
dataset_name: mosaic-instruct-v3 |
|
input_path: !!python/tuple |
|
- prompt |
|
- dataset_namespace: lilac |
|
dataset_name: GAIR-lima |
|
input_path: !!python/tuple |
|
- conversations |
|
- '*' |
|
- dataset_namespace: lilac |
|
dataset_name: dolphin |
|
input_path: !!python/tuple |
|
- input |
|
- dataset_namespace: lilac |
|
dataset_name: UltraChat-200k |
|
input_selector: |
|
format: openai_json |
|
selector: user |
|
output_path: !!python/tuple |
|
- messages__clusters |
|
- dataset_namespace: lilac |
|
dataset_name: hncomments-1m |
|
input_path: !!python/tuple |
|
- text |
|
- dataset_namespace: lilac |
|
dataset_name: roblox_luau_corpus |
|
input_path: !!python/tuple |
|
- prompt |
|
- dataset_namespace: lilac |
|
dataset_name: roblox_luau_corpus |
|
input_path: !!python/tuple |
|
- completion |
|
- dataset_namespace: lilac |
|
dataset_name: MMLU |
|
input_path: !!python/tuple |
|
- question |
|
- dataset_namespace: lilac |
|
dataset_name: ARC-Easy |
|
input_path: !!python/tuple |
|
- question |
|
- dataset_namespace: lilac |
|
dataset_name: ARC-Challenge |
|
input_path: !!python/tuple |
|
- question |
|
- dataset_namespace: lilac |
|
dataset_name: HellaSwag |
|
input_path: !!python/tuple |
|
- ctx |
|
- dataset_namespace: lilac |
|
dataset_name: HumanEval |
|
input_path: !!python/tuple |
|
- prompt |
|
- dataset_namespace: lilac |
|
dataset_name: mbpp |
|
input_path: !!python/tuple |
|
- text |
|
- dataset_namespace: lilac |
|
dataset_name: TruthfulQA-Generation |
|
input_path: !!python/tuple |
|
- question |
|
- dataset_namespace: lilac |
|
dataset_name: TruthfulQA-MultipleChoice |
|
input_path: !!python/tuple |
|
- question |
|
- dataset_namespace: lilac |
|
dataset_name: GSM8K-main |
|
input_path: !!python/tuple |
|
- question |
|
- dataset_namespace: lilac |
|
dataset_name: GSM8K-socratic |
|
input_path: !!python/tuple |
|
- question |
|
- dataset_namespace: lilac |
|
dataset_name: WinoGrande |
|
input_path: !!python/tuple |
|
- sentence |
|
|