diff --git "a/data/lilac.yml" "b/data/lilac.yml" --- "a/data/lilac.yml" +++ "b/data/lilac.yml" @@ -1,14 +1,8 @@ datasets: - namespace: lilac name: imdb - tags: [] source: dataset_name: imdb - config_name: null - split: null - sample_size: null - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: text @@ -16,78 +10,60 @@ datasets: signals: - path: text signal: - threshold: 0.85 signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: - split_by_paragraph: false signal_name: lang_detection - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: text signal: @@ -97,136 +73,102 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: @@ -234,25 +176,18 @@ datasets: - path: text signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan settings: ui: media_paths: - text markdown_paths: [] - preferred_embedding: gte-small + tags: + - machine-learning - namespace: lilac name: open-asssistant-conversations - tags: [] source: dataset_name: OpenAssistant/oasst1 - config_name: null - split: null - sample_size: null - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: text @@ -260,86 +195,66 @@ datasets: signals: - path: text signal: - threshold: 0.85 signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: - split_by_paragraph: false signal_name: lang_detection - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: text signal: @@ -349,64 +264,48 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: text signal: @@ -414,25 +313,19 @@ datasets: - path: text signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan settings: ui: media_paths: - text markdown_paths: [] - preferred_embedding: gte-small + tags: + - machine-learning - namespace: lilac name: wikitext-2-raw-v1 - tags: [] source: dataset_name: wikitext config_name: wikitext-2-raw-v1 - split: null - sample_size: null - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: text @@ -440,14 +333,12 @@ datasets: signals: - path: text signal: - threshold: 0.85 signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: - split_by_paragraph: false signal_name: lang_detection - path: text signal: @@ -457,128 +348,96 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: text signal: @@ -586,25 +445,18 @@ datasets: - path: text signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan settings: ui: media_paths: - text markdown_paths: [] - preferred_embedding: gte-small + tags: + - machine-learning - namespace: lilac name: databricks-dolly-15k-curated-en - tags: [] source: dataset_name: argilla/databricks-dolly-15k-curated-en - config_name: null - split: null - sample_size: null - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: original-context @@ -631,42 +483,36 @@ datasets: signals: - path: original-instruction signal: - threshold: 0.85 signal_name: near_dup - path: original-instruction signal: signal_name: pii - path: original-instruction signal: - split_by_paragraph: false signal_name: lang_detection - path: original-instruction signal: signal_name: text_statistics - path: original-context signal: - threshold: 0.85 signal_name: near_dup - path: original-context signal: signal_name: pii - path: original-context signal: - split_by_paragraph: false signal_name: lang_detection - path: original-context signal: signal_name: text_statistics - path: original-response signal: - threshold: 0.85 signal_name: near_dup - path: original-response signal: signal_name: pii - path: original-response signal: - split_by_paragraph: false signal_name: lang_detection - path: original-response signal: @@ -676,7 +522,6 @@ datasets: - value - '*' signal: - threshold: 0.85 signal_name: near_dup - path: - new-instruction @@ -689,7 +534,6 @@ datasets: - value - '*' signal: - split_by_paragraph: false signal_name: lang_detection - path: - new-instruction @@ -702,7 +546,6 @@ datasets: - value - '*' signal: - threshold: 0.85 signal_name: near_dup - path: - new-context @@ -715,7 +558,6 @@ datasets: - value - '*' signal: - split_by_paragraph: false signal_name: lang_detection - path: - new-context @@ -728,7 +570,6 @@ datasets: - value - '*' signal: - threshold: 0.85 signal_name: near_dup - path: - new-response @@ -741,7 +582,6 @@ datasets: - value - '*' signal: - split_by_paragraph: false signal_name: lang_detection - path: - new-response @@ -754,192 +594,144 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -949,8 +741,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -960,8 +750,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -971,8 +759,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -982,8 +768,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -993,8 +777,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1004,8 +786,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1015,8 +795,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1026,8 +804,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1037,8 +813,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1048,8 +822,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1059,8 +831,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1070,8 +840,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1081,8 +849,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1092,8 +858,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1103,8 +867,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1114,8 +876,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1125,8 +885,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1136,8 +894,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1147,8 +903,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1158,8 +912,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1169,8 +921,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1180,8 +930,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1191,8 +939,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1202,200 +948,150 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1405,8 +1101,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1416,8 +1110,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1427,8 +1119,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1438,8 +1128,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1449,8 +1137,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1460,8 +1146,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1471,8 +1155,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1482,8 +1164,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1493,8 +1173,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1504,8 +1182,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1515,8 +1191,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1526,8 +1200,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1537,8 +1209,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1548,8 +1218,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1559,8 +1227,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-context @@ -1570,8 +1236,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1581,8 +1245,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1592,8 +1254,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1603,8 +1263,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1614,8 +1272,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1625,8 +1281,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1636,8 +1290,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1647,8 +1299,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1658,136 +1308,102 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1797,8 +1413,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1808,8 +1422,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1819,8 +1431,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1830,8 +1440,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1841,8 +1449,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1852,8 +1458,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1863,8 +1467,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -1874,8 +1476,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1885,8 +1485,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1896,8 +1494,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1907,8 +1503,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1918,8 +1512,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1929,8 +1521,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1940,8 +1530,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1951,8 +1539,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-response @@ -1962,136 +1548,102 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2101,8 +1653,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2112,8 +1662,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2123,8 +1671,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2134,8 +1680,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2145,8 +1689,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2156,8 +1698,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2167,8 +1707,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2178,8 +1716,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2189,8 +1725,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2200,8 +1734,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2211,8 +1743,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2222,8 +1752,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2233,8 +1761,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2244,8 +1770,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2255,8 +1779,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2266,136 +1788,102 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2405,8 +1893,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2416,8 +1902,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2427,8 +1911,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2438,8 +1920,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2449,8 +1929,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2460,8 +1938,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2471,8 +1947,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2482,8 +1956,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2493,8 +1965,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2504,8 +1974,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2515,8 +1983,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2526,8 +1992,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2537,8 +2001,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2548,8 +2010,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2559,8 +2019,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2570,136 +2028,102 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2709,8 +2133,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2720,8 +2142,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2731,8 +2151,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2742,8 +2160,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2753,8 +2169,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2764,8 +2178,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2775,8 +2187,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -2786,8 +2196,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2797,8 +2205,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2808,8 +2214,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2819,8 +2223,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2830,8 +2232,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2841,8 +2241,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2852,8 +2250,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2863,8 +2259,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-response @@ -2874,8 +2268,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: @@ -2885,64 +2277,48 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-context signal: @@ -2955,64 +2331,48 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3028,8 +2388,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3039,8 +2397,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3050,8 +2406,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3061,8 +2415,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3072,8 +2424,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3083,8 +2433,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3094,8 +2442,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3105,8 +2451,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-context @@ -3128,8 +2472,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3139,8 +2481,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3150,8 +2490,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3161,8 +2499,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3172,8 +2508,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3183,8 +2517,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3194,8 +2526,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3205,154 +2535,114 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: original-response signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3360,8 +2650,6 @@ datasets: - '*' signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: - new-instruction @@ -3371,8 +2659,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3382,8 +2668,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3393,8 +2677,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3404,8 +2686,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3415,8 +2695,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3426,8 +2704,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3437,8 +2713,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-instruction @@ -3448,8 +2722,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - new-context @@ -3457,8 +2729,6 @@ datasets: - '*' signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: - new-response @@ -3466,8 +2736,6 @@ datasets: - '*' signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: - new-response @@ -3477,8 +2745,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3488,8 +2754,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3499,8 +2763,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3510,8 +2772,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3521,8 +2781,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3532,8 +2790,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3543,8 +2799,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - new-response @@ -3554,8 +2808,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score settings: ui: @@ -3573,17 +2825,12 @@ datasets: - value - '*' markdown_paths: [] - preferred_embedding: gte-small + tags: + - machine-learning - namespace: lilac name: piqa - tags: [] source: dataset_name: piqa - config_name: null - split: null - sample_size: null - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: goal @@ -3595,234 +2842,180 @@ datasets: signals: - path: goal signal: - threshold: 0.85 signal_name: near_dup - path: goal signal: signal_name: pii - path: goal signal: - split_by_paragraph: false signal_name: lang_detection - path: goal signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: goal signal: signal_name: text_statistics - path: sol1 signal: - threshold: 0.85 signal_name: near_dup - path: sol1 signal: signal_name: pii - path: sol1 signal: - split_by_paragraph: false signal_name: lang_detection - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: sol1 signal: signal_name: text_statistics - path: sol2 signal: - threshold: 0.85 signal_name: near_dup - path: sol2 signal: signal_name: pii - path: sol2 signal: - split_by_paragraph: false signal_name: lang_detection - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: sol2 signal: @@ -3834,17 +3027,11 @@ datasets: - sol2 - goal markdown_paths: [] - preferred_embedding: gte-small - namespace: lilac name: OpenOrca-100k - tags: [] source: dataset_name: Open-Orca/OpenOrca - config_name: null - split: null sample_size: 100000 - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: question @@ -3854,156 +3041,120 @@ datasets: signals: - path: question signal: - threshold: 0.85 signal_name: near_dup - path: question signal: signal_name: pii - path: question signal: - split_by_paragraph: false signal_name: lang_detection - path: question signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: question signal: signal_name: text_statistics - path: response signal: - threshold: 0.85 signal_name: near_dup - path: response signal: signal_name: pii - path: response signal: - split_by_paragraph: false signal_name: lang_detection - path: response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: response signal: @@ -4013,128 +3164,96 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: question signal: @@ -4145,14 +3264,10 @@ datasets: - path: question signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: response signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan settings: ui: @@ -4160,17 +3275,14 @@ datasets: - question - response markdown_paths: [] - preferred_embedding: gte-small + tags: + - machine-learning - namespace: lilac name: opus100-en-es-validation - tags: [] source: dataset_name: opus100 config_name: en-es split: validation - sample_size: null - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: @@ -4186,7 +3298,6 @@ datasets: - translation - en signal: - threshold: 0.85 signal_name: near_dup - path: - translation @@ -4197,7 +3308,6 @@ datasets: - translation - en signal: - split_by_paragraph: false signal_name: lang_detection - path: - translation @@ -4206,8 +3316,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - translation @@ -4216,8 +3324,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - translation @@ -4226,8 +3332,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - translation @@ -4236,8 +3340,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - translation @@ -4246,8 +3348,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - translation @@ -4256,8 +3356,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - translation @@ -4266,8 +3364,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - translation @@ -4276,8 +3372,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - translation @@ -4288,7 +3382,6 @@ datasets: - translation - es signal: - threshold: 0.85 signal_name: near_dup - path: - translation @@ -4299,7 +3392,6 @@ datasets: - translation - es signal: - split_by_paragraph: false signal_name: lang_detection - path: - translation @@ -4308,8 +3400,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - translation @@ -4318,8 +3408,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - translation @@ -4328,8 +3416,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - translation @@ -4338,8 +3424,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - translation @@ -4348,8 +3432,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - translation @@ -4358,8 +3440,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - translation @@ -4368,8 +3448,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - translation @@ -4378,8 +3456,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - translation @@ -4393,8 +3469,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - translation @@ -4403,8 +3477,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - translation @@ -4413,8 +3485,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - translation @@ -4423,8 +3493,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - translation @@ -4433,8 +3501,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - translation @@ -4443,8 +3509,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - translation @@ -4453,8 +3517,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - translation @@ -4463,8 +3525,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - translation @@ -4473,8 +3533,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - translation @@ -4483,8 +3541,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - translation @@ -4493,8 +3549,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - translation @@ -4503,8 +3557,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - translation @@ -4513,8 +3565,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - translation @@ -4523,8 +3573,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - translation @@ -4533,8 +3581,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - translation @@ -4543,8 +3589,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - translation @@ -4561,16 +3605,12 @@ datasets: - es signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: - translation - en signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan settings: ui: @@ -4580,17 +3620,12 @@ datasets: - - translation - en markdown_paths: [] - preferred_embedding: gte-small + tags: + - machine-learning - namespace: lilac name: science-qa-derek-thomas - tags: [] source: dataset_name: derek-thomas/ScienceQA - config_name: null - split: null - sample_size: null - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: lecture @@ -4598,14 +3633,12 @@ datasets: signals: - path: lecture signal: - threshold: 0.85 signal_name: near_dup - path: lecture signal: signal_name: pii - path: lecture signal: - split_by_paragraph: false signal_name: lang_detection - path: lecture signal: @@ -4615,128 +3648,96 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: lecture signal: @@ -4744,25 +3745,20 @@ datasets: - path: lecture signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan settings: ui: media_paths: - lecture markdown_paths: [] - preferred_embedding: gte-small + tags: + - science - namespace: lilac name: enron-emails - tags: [] source: dataset_name: EleutherAI/pile config_name: enron_emails - split: null sample_size: 100000 - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: text @@ -4770,78 +3766,60 @@ datasets: signals: - path: text signal: - threshold: 0.85 signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: - split_by_paragraph: false signal_name: lang_detection - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: text signal: @@ -4851,64 +3829,48 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: text signal: @@ -4916,24 +3878,19 @@ datasets: - path: text signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan settings: ui: media_paths: - text markdown_paths: [] - preferred_embedding: gte-small + tags: + - business - namespace: lilac name: the_movies_dataset - tags: [] source: filepaths: - https://storage.googleapis.com/lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv - delim: ',' - header: true - names: null source_name: csv embeddings: - path: overview @@ -4941,14 +3898,12 @@ datasets: signals: - path: overview signal: - threshold: 0.85 signal_name: near_dup - path: overview signal: signal_name: pii - path: overview signal: - split_by_paragraph: false signal_name: lang_detection - path: overview signal: @@ -4958,128 +3913,96 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: overview signal: @@ -5087,25 +4010,18 @@ datasets: - path: overview signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan settings: ui: media_paths: - overview markdown_paths: [] - preferred_embedding: gte-small + tags: + - other - namespace: lilac name: textbook_quality_programming - tags: [] source: dataset_name: vikp/textbook_quality_programming - config_name: null - split: null - sample_size: null - revision: null - load_from_disk: false source_name: huggingface embeddings: - path: @@ -5133,13 +4049,11 @@ datasets: - outline - '*' signal: - threshold: 0.85 signal_name: near_dup - path: - outline - '*' signal: - split_by_paragraph: false signal_name: lang_detection - path: - outline @@ -5148,8 +4062,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - outline @@ -5158,8 +4070,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - outline @@ -5168,8 +4078,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - outline @@ -5178,8 +4086,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - outline @@ -5188,8 +4094,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - outline @@ -5198,8 +4102,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - outline @@ -5208,8 +4110,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - outline @@ -5218,8 +4118,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - concepts @@ -5235,13 +4133,11 @@ datasets: - concepts - '*' signal: - threshold: 0.85 signal_name: near_dup - path: - concepts - '*' signal: - split_by_paragraph: false signal_name: lang_detection - path: - concepts @@ -5250,8 +4146,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: - concepts @@ -5260,8 +4154,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: - concepts @@ -5270,8 +4162,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: - concepts @@ -5280,8 +4170,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: - concepts @@ -5290,8 +4178,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: - concepts @@ -5300,8 +4186,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: - concepts @@ -5310,8 +4194,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: - concepts @@ -5320,8 +4202,6 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: markdown signal: @@ -5331,75 +4211,57 @@ datasets: signal_name: text_statistics - path: markdown signal: - threshold: 0.85 signal_name: near_dup - path: markdown signal: - split_by_paragraph: false signal_name: lang_detection - path: markdown signal: embedding: gte-small namespace: lilac concept_name: legal-termination - version: null - draft: main signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment - version: null - draft: main signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: non-english - version: null - draft: main signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment - version: null - draft: main signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: profanity - version: null - draft: main signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: question - version: null - draft: main signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: source-code - version: null - draft: main signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: toxicity - version: null - draft: main signal_name: concept_score - path: - outline @@ -5419,22 +4281,16 @@ datasets: - '*' signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: - concepts - '*' signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan - path: markdown signal: embedding: gte-small - min_cluster_size: 5 - umap_n_components: 10 signal_name: cluster_hdbscan settings: ui: @@ -5446,6 +4302,242 @@ datasets: - markdown markdown_paths: - markdown - preferred_embedding: gte-small -signals: [] -concept_model_cache_embeddings: [] + tags: + - machine-learning + - namespace: lilac + name: stanford-alpaca + source: + filepaths: + - https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json + source_name: json + embeddings: + - path: output + embedding: gte-small + - path: instruction + embedding: gte-small + - path: input + embedding: gte-small + signals: + - path: output + signal: + signal_name: pii + - path: output + signal: + signal_name: text_statistics + - path: output + signal: + signal_name: near_dup + - path: output + signal: + signal_name: lang_detection + - path: output + signal: + embedding: gte-small + signal_name: cluster_hdbscan + - path: output + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + signal_name: concept_score + - path: output + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + signal_name: concept_score + - path: output + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + signal_name: concept_score + - path: output + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + signal_name: concept_score + - path: output + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + signal_name: concept_score + - path: output + signal: + embedding: gte-small + namespace: lilac + concept_name: question + signal_name: concept_score + - path: output + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + signal_name: concept_score + - path: output + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + signal_name: concept_score + - path: instruction + signal: + signal_name: pii + - path: instruction + signal: + signal_name: text_statistics + - path: instruction + signal: + signal_name: near_dup + - path: instruction + signal: + signal_name: lang_detection + - path: instruction + signal: + embedding: gte-small + signal_name: cluster_hdbscan + - path: instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + signal_name: concept_score + - path: instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + signal_name: concept_score + - path: instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + signal_name: concept_score + - path: instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + signal_name: concept_score + - path: instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + signal_name: concept_score + - path: instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: question + signal_name: concept_score + - path: instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + signal_name: concept_score + - path: instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + signal_name: concept_score + - path: input + signal: + signal_name: pii + - path: input + signal: + signal_name: text_statistics + - path: input + signal: + signal_name: near_dup + - path: input + signal: + signal_name: lang_detection + - path: input + signal: + embedding: gte-small + signal_name: cluster_hdbscan + - path: input + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + signal_name: concept_score + - path: input + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + signal_name: concept_score + - path: input + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + signal_name: concept_score + - path: input + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + signal_name: concept_score + - path: input + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + signal_name: concept_score + - path: input + signal: + embedding: gte-small + namespace: lilac + concept_name: question + signal_name: concept_score + - path: input + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + signal_name: concept_score + - path: input + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + signal_name: concept_score + settings: + ui: + media_paths: + - output + - instruction + - input + markdown_paths: [] + - namespace: local + name: glaive + source: + dataset_name: glaiveai/glaive-code-assistant + source_name: huggingface + settings: + ui: + media_paths: + - question + - answer + - - answer_formatted + - answer + markdown_paths: [] + - namespace: lilac + name: glaive + source: + dataset_name: glaiveai/glaive-code-assistant + source_name: huggingface + settings: + ui: + media_paths: + - question + - answer + markdown_paths: []