lilac / data /lilac.yml
nsthorat's picture
Push
b1494e2
raw
history blame
50.7 kB
# Lilac project config.
# See https://lilacml.com/api_reference/index.html#lilac.Config for details.
datasets:
- namespace: lilac
name: imdb
source:
dataset_name: imdb
source_name: huggingface
embeddings:
- path: text
embedding: gte-small
signals:
- path: text
signal:
signal_name: near_dup
- path: text
signal:
signal_name: pii
- path: text
signal:
signal_name: lang_detection
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: text
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- text
preferred_embedding: gte-small
- namespace: lilac
name: open-asssistant-conversations
source:
dataset_name: OpenAssistant/oasst1
source_name: huggingface
embeddings:
- path: text
embedding: gte-small
signals:
- path: text
signal:
signal_name: near_dup
- path: text
signal:
signal_name: pii
- path: text
signal:
signal_name: lang_detection
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: text
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- text
preferred_embedding: gte-small
- namespace: lilac
name: wikitext-2-raw-v1
source:
dataset_name: wikitext
config_name: wikitext-2-raw-v1
source_name: huggingface
embeddings:
- path: text
embedding: gte-small
signals:
- path: text
signal:
signal_name: near_dup
- path: text
signal:
signal_name: pii
- path: text
signal:
signal_name: lang_detection
- path: text
signal:
signal_name: text_statistics
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
settings:
ui:
media_paths:
- text
preferred_embedding: gte-small
- namespace: lilac
name: squad_v2
source:
dataset_name: squad_v2
source_name: huggingface
embeddings:
- path: context
embedding: gte-small
signals:
- path: context
signal:
signal_name: near_dup
- path: context
signal:
signal_name: pii
- path: context
signal:
signal_name: lang_detection
- path: context
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: context
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: context
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: context
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: context
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: context
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: context
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: context
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: context
signal:
signal_name: text_statistics
- path: question
signal:
signal_name: near_dup
- path: question
signal:
signal_name: pii
- path: question
signal:
signal_name: lang_detection
- path: question
signal:
signal_name: text_statistics
- path:
- answers
- text
- '*'
signal:
signal_name: near_dup
- path:
- answers
- text
- '*'
signal:
signal_name: pii
- path:
- answers
- text
- '*'
signal:
signal_name: lang_detection
- path:
- answers
- text
- '*'
signal:
signal_name: text_statistics
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path:
- answers
- text
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path:
- answers
- text
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path:
- answers
- text
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path:
- answers
- text
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path:
- answers
- text
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path:
- answers
- text
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path:
- answers
- text
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path:
- answers
- text
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
settings:
ui:
media_paths:
- context
- question
- - answers
- text
- '*'
preferred_embedding: gte-small
- namespace: lilac
name: databricks-dolly-15k-curated-en
source:
dataset_name: argilla/databricks-dolly-15k-curated-en
source_name: huggingface
embeddings:
- path: original-context
embedding: gte-small
- path:
- new-context
- value
- '*'
embedding: gte-small
signals:
- path: original-instruction
signal:
signal_name: near_dup
- path: original-instruction
signal:
signal_name: pii
- path: original-instruction
signal:
signal_name: lang_detection
- path: original-instruction
signal:
signal_name: text_statistics
- path: original-context
signal:
signal_name: near_dup
- path: original-context
signal:
signal_name: pii
- path: original-context
signal:
signal_name: lang_detection
- path: original-context
signal:
signal_name: text_statistics
- path: original-response
signal:
signal_name: near_dup
- path: original-response
signal:
signal_name: pii
- path: original-response
signal:
signal_name: lang_detection
- path: original-response
signal:
signal_name: text_statistics
- path:
- new-instruction
- value
- '*'
signal:
signal_name: near_dup
- path:
- new-instruction
- value
- '*'
signal:
signal_name: pii
- path:
- new-instruction
- value
- '*'
signal:
signal_name: lang_detection
- path:
- new-instruction
- value
- '*'
signal:
signal_name: text_statistics
- path:
- new-context
- value
- '*'
signal:
signal_name: near_dup
- path:
- new-context
- value
- '*'
signal:
signal_name: pii
- path:
- new-context
- value
- '*'
signal:
signal_name: lang_detection
- path:
- new-context
- value
- '*'
signal:
signal_name: text_statistics
- path:
- new-response
- value
- '*'
signal:
signal_name: near_dup
- path:
- new-response
- value
- '*'
signal:
signal_name: pii
- path:
- new-response
- value
- '*'
signal:
signal_name: lang_detection
- path:
- new-response
- value
- '*'
signal:
signal_name: text_statistics
- path: original-instruction
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: original-instruction
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: original-instruction
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: original-instruction
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: original-instruction
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: original-instruction
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: original-instruction
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: original-instruction
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: original-context
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: original-context
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: original-context
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: original-context
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: original-context
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: original-context
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: original-context
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: original-context
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: original-response
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: original-response
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: original-response
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: original-response
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: original-response
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: original-response
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: original-response
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: original-response
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path:
- new-instruction
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path:
- new-instruction
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path:
- new-instruction
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path:
- new-instruction
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path:
- new-instruction
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path:
- new-instruction
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path:
- new-instruction
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path:
- new-instruction
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path:
- new-context
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path:
- new-context
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path:
- new-context
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path:
- new-context
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path:
- new-context
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path:
- new-context
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path:
- new-context
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path:
- new-context
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path:
- new-response
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path:
- new-response
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path:
- new-response
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path:
- new-response
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path:
- new-response
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path:
- new-response
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path:
- new-response
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path:
- new-response
- value
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
settings:
ui:
media_paths:
- original-instruction
- original-context
- original-response
- - new-instruction
- value
- '*'
- - new-context
- value
- '*'
- - new-response
- value
- '*'
preferred_embedding: gte-small
- namespace: lilac
name: piqa
source:
dataset_name: piqa
source_name: huggingface
embeddings:
- path: goal
embedding: gte-small
- path: sol1
embedding: gte-small
- path: sol2
embedding: gte-small
signals:
- path: goal
signal:
signal_name: near_dup
- path: goal
signal:
signal_name: pii
- path: goal
signal:
signal_name: lang_detection
- path: goal
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: goal
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: goal
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: goal
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: goal
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: goal
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: goal
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: goal
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: goal
signal:
signal_name: text_statistics
- path: sol1
signal:
signal_name: near_dup
- path: sol1
signal:
signal_name: pii
- path: sol1
signal:
signal_name: lang_detection
- path: sol1
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: sol1
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: sol1
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: sol1
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: sol1
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: sol1
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: sol1
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: sol1
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: sol1
signal:
signal_name: text_statistics
- path: sol2
signal:
signal_name: near_dup
- path: sol2
signal:
signal_name: pii
- path: sol2
signal:
signal_name: lang_detection
- path: sol2
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: sol2
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: sol2
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: sol2
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: sol2
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: sol2
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: sol2
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: sol2
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: sol2
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- sol1
- sol2
- goal
preferred_embedding: gte-small
- namespace: lilac
name: OpenOrca-100k
source:
dataset_name: Open-Orca/OpenOrca
sample_size: 100000
source_name: huggingface
embeddings:
- path: question
embedding: gte-small
- path: response
embedding: gte-small
signals:
- path: question
signal:
signal_name: near_dup
- path: question
signal:
signal_name: pii
- path: question
signal:
signal_name: lang_detection
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: question
signal:
signal_name: text_statistics
- path: response
signal:
signal_name: near_dup
- path: response
signal:
signal_name: pii
- path: response
signal:
signal_name: lang_detection
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: response
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- question
- response
preferred_embedding: gte-small
- namespace: lilac
name: opus100-en-es-validation
source:
dataset_name: opus100
config_name: en-es
split: validation
source_name: huggingface
embeddings:
- path:
- translation
- en
embedding: gte-small
- path:
- translation
- es
embedding: gte-small
signals:
- path:
- translation
- en
signal:
signal_name: near_dup
- path:
- translation
- en
signal:
signal_name: pii
- path:
- translation
- en
signal:
signal_name: lang_detection
- path:
- translation
- en
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path:
- translation
- en
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path:
- translation
- en
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path:
- translation
- en
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path:
- translation
- en
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path:
- translation
- en
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path:
- translation
- en
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path:
- translation
- en
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path:
- translation
- en
signal:
signal_name: text_statistics
- path:
- translation
- es
signal:
signal_name: near_dup
- path:
- translation
- es
signal:
signal_name: pii
- path:
- translation
- es
signal:
signal_name: lang_detection
- path:
- translation
- es
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path:
- translation
- es
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path:
- translation
- es
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path:
- translation
- es
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path:
- translation
- es
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path:
- translation
- es
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path:
- translation
- es
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path:
- translation
- es
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path:
- translation
- es
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- - translation
- es
- - translation
- en
preferred_embedding: gte-small
- namespace: lilac
name: mmlu_professional_law
source:
dataset_name: cais/mmlu
config_name: professional_law
source_name: huggingface
embeddings:
- path: question
embedding: gte-small
- path:
- choices
- '*'
embedding: gte-small
signals:
- path: question
signal:
signal_name: near_dup
- path: question
signal:
signal_name: pii
- path: question
signal:
signal_name: lang_detection
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: question
signal:
signal_name: text_statistics
- path:
- choices
- '*'
signal:
signal_name: near_dup
- path:
- choices
- '*'
signal:
signal_name: pii
- path:
- choices
- '*'
signal:
signal_name: lang_detection
- path:
- choices
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path:
- choices
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path:
- choices
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path:
- choices
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path:
- choices
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path:
- choices
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path:
- choices
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path:
- choices
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path:
- choices
- '*'
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path:
- choices
- '*'
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- question
- - choices
- '*'
preferred_embedding: gte-small
- namespace: lilac
name: pile-of-law-r-legaladvice
source:
dataset_name: pile-of-law/pile-of-law
config_name: r_legaladvice
source_name: huggingface
embeddings:
- path: text
embedding: gte-small
signals:
- path: text
signal:
signal_name: near_dup
- path: text
signal:
signal_name: pii
- path: text
signal:
signal_name: lang_detection
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: text
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- text
preferred_embedding: gte-small
- namespace: lilac
name: science-qa-derek-thomas
source:
dataset_name: derek-thomas/ScienceQA
source_name: huggingface
embeddings:
- path: lecture
embedding: gte-small
signals:
- path: lecture
signal:
signal_name: near_dup
- path: lecture
signal:
signal_name: pii
- path: lecture
signal:
signal_name: lang_detection
- path: lecture
signal:
signal_name: text_statistics
- path: lecture
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: lecture
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: lecture
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: lecture
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: lecture
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: lecture
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: lecture
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: lecture
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
settings:
ui:
media_paths:
- lecture
preferred_embedding: gte-small
- namespace: lilac
name: enron-emails
source:
dataset_name: EleutherAI/pile
config_name: enron_emails
sample_size: 100000
source_name: huggingface
embeddings:
- path: text
embedding: gte-small
signals:
- path: text
signal:
signal_name: near_dup
- path: text
signal:
signal_name: pii
- path: text
signal:
signal_name: lang_detection
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: text
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: text
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- text
preferred_embedding: gte-small
- namespace: lilac
name: the_movies_dataset
source:
filepaths:
- https://storage.googleapis.com/lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv
source_name: csv
embeddings:
- path: overview
embedding: gte-small
signals:
- path: overview
signal:
signal_name: near_dup
- path: overview
signal:
signal_name: pii
- path: overview
signal:
signal_name: lang_detection
- path: overview
signal:
signal_name: text_statistics
- path: overview
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: overview
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: overview
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: overview
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: overview
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: overview
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: overview
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: overview
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
settings:
ui:
media_paths:
- overview
preferred_embedding: gte-small