davidberenstein1957 HF staff commited on
Commit
fd2f716
β€’
1 Parent(s): 629fdb8

refactor package folders

Browse files
Files changed (19) hide show
  1. app.py +1 -1
  2. pyproject.toml +1 -1
  3. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/__init__.py +0 -0
  4. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/_tabbedinterface.py +0 -0
  5. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/app.py +5 -5
  6. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/__init__.py +0 -0
  7. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/base.py +2 -2
  8. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/eval.py +5 -5
  9. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/faq.py +0 -0
  10. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/sft.py +5 -5
  11. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/textcat.py +5 -5
  12. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/constants.py +0 -0
  13. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/__init__.py +0 -0
  14. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/base.py +1 -1
  15. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/embeddings.py +1 -1
  16. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/eval.py +3 -3
  17. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/sft.py +2 -2
  18. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/textcat.py +3 -3
  19. src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/utils.py +1 -1
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from distilabel_dataset_generator.app import demo
2
 
3
  if __name__ == "__main__":
4
  demo.launch()
 
1
+ from synthetic_dataset_generator.app import demo
2
 
3
  if __name__ == "__main__":
4
  demo.launch()
pyproject.toml CHANGED
@@ -1,5 +1,5 @@
1
  [project]
2
- name = "distilabel-dataset-generator"
3
  version = "0.1.0"
4
  description = "Build datasets using natural language"
5
  authors = [
 
1
  [project]
2
+ name = "synthetic-dataset-generator"
3
  version = "0.1.0"
4
  description = "Build datasets using natural language"
5
  authors = [
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/__init__.py RENAMED
File without changes
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/_tabbedinterface.py RENAMED
File without changes
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/app.py RENAMED
@@ -1,8 +1,8 @@
1
- from distilabel_dataset_generator._tabbedinterface import TabbedInterface
2
- from distilabel_dataset_generator.apps.eval import app as eval_app
3
- from distilabel_dataset_generator.apps.faq import app as faq_app
4
- from distilabel_dataset_generator.apps.sft import app as sft_app
5
- from distilabel_dataset_generator.apps.textcat import app as textcat_app
6
 
7
  theme = "argilla/argilla-theme"
8
 
 
1
+ from synthetic_dataset_generator._tabbedinterface import TabbedInterface
2
+ from synthetic_dataset_generator.apps.eval import app as eval_app
3
+ from synthetic_dataset_generator.apps.faq import app as faq_app
4
+ from synthetic_dataset_generator.apps.sft import app as sft_app
5
+ from synthetic_dataset_generator.apps.textcat import app as textcat_app
6
 
7
  theme = "argilla/argilla-theme"
8
 
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/__init__.py RENAMED
File without changes
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/base.py RENAMED
@@ -10,8 +10,8 @@ from distilabel.distiset import Distiset
10
  from gradio import OAuthToken
11
  from huggingface_hub import HfApi, upload_file
12
 
13
- from distilabel_dataset_generator.constants import TEXTCAT_TASK
14
- from distilabel_dataset_generator.utils import (
15
  get_argilla_client,
16
  )
17
 
 
10
  from gradio import OAuthToken
11
  from huggingface_hub import HfApi, upload_file
12
 
13
+ from synthetic_dataset_generator.constants import TEXTCAT_TASK
14
+ from synthetic_dataset_generator.utils import (
15
  get_argilla_client,
16
  )
17
 
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/eval.py RENAMED
@@ -16,23 +16,23 @@ from distilabel.distiset import Distiset
16
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
17
  from huggingface_hub import HfApi
18
 
19
- from distilabel_dataset_generator.apps.base import (
20
  hide_success_message,
21
  show_success_message,
22
  validate_argilla_user_workspace_dataset,
23
  validate_push_to_hub,
24
  )
25
- from distilabel_dataset_generator.constants import DEFAULT_BATCH_SIZE
26
- from distilabel_dataset_generator.pipelines.embeddings import (
27
  get_embeddings,
28
  get_sentence_embedding_dimensions,
29
  )
30
- from distilabel_dataset_generator.pipelines.eval import (
31
  generate_pipeline_code,
32
  get_custom_evaluator,
33
  get_ultrafeedback_evaluator,
34
  )
35
- from distilabel_dataset_generator.utils import (
36
  column_to_list,
37
  extract_column_names,
38
  get_argilla_client,
 
16
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
17
  from huggingface_hub import HfApi
18
 
19
+ from synthetic_dataset_generator.apps.base import (
20
  hide_success_message,
21
  show_success_message,
22
  validate_argilla_user_workspace_dataset,
23
  validate_push_to_hub,
24
  )
25
+ from synthetic_dataset_generator.constants import DEFAULT_BATCH_SIZE
26
+ from synthetic_dataset_generator.pipelines.embeddings import (
27
  get_embeddings,
28
  get_sentence_embedding_dimensions,
29
  )
30
+ from synthetic_dataset_generator.pipelines.eval import (
31
  generate_pipeline_code,
32
  get_custom_evaluator,
33
  get_ultrafeedback_evaluator,
34
  )
35
+ from synthetic_dataset_generator.utils import (
36
  column_to_list,
37
  extract_column_names,
38
  get_argilla_client,
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/faq.py RENAMED
File without changes
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/sft.py RENAMED
@@ -9,25 +9,25 @@ from datasets import Dataset
9
  from distilabel.distiset import Distiset
10
  from huggingface_hub import HfApi
11
 
12
- from distilabel_dataset_generator.apps.base import (
13
  hide_success_message,
14
  show_success_message,
15
  validate_argilla_user_workspace_dataset,
16
  validate_push_to_hub,
17
  )
18
- from distilabel_dataset_generator.constants import DEFAULT_BATCH_SIZE, SFT_AVAILABLE
19
- from distilabel_dataset_generator.pipelines.embeddings import (
20
  get_embeddings,
21
  get_sentence_embedding_dimensions,
22
  )
23
- from distilabel_dataset_generator.pipelines.sft import (
24
  DEFAULT_DATASET_DESCRIPTIONS,
25
  generate_pipeline_code,
26
  get_magpie_generator,
27
  get_prompt_generator,
28
  get_response_generator,
29
  )
30
- from distilabel_dataset_generator.utils import (
31
  get_argilla_client,
32
  get_org_dropdown,
33
  swap_visibility,
 
9
  from distilabel.distiset import Distiset
10
  from huggingface_hub import HfApi
11
 
12
+ from synthetic_dataset_generator.apps.base import (
13
  hide_success_message,
14
  show_success_message,
15
  validate_argilla_user_workspace_dataset,
16
  validate_push_to_hub,
17
  )
18
+ from synthetic_dataset_generator.constants import DEFAULT_BATCH_SIZE, SFT_AVAILABLE
19
+ from synthetic_dataset_generator.pipelines.embeddings import (
20
  get_embeddings,
21
  get_sentence_embedding_dimensions,
22
  )
23
+ from synthetic_dataset_generator.pipelines.sft import (
24
  DEFAULT_DATASET_DESCRIPTIONS,
25
  generate_pipeline_code,
26
  get_magpie_generator,
27
  get_prompt_generator,
28
  get_response_generator,
29
  )
30
+ from synthetic_dataset_generator.utils import (
31
  get_argilla_client,
32
  get_org_dropdown,
33
  swap_visibility,
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/apps/textcat.py RENAMED
@@ -9,30 +9,30 @@ from datasets import ClassLabel, Dataset, Features, Sequence, Value
9
  from distilabel.distiset import Distiset
10
  from huggingface_hub import HfApi
11
 
12
- from distilabel_dataset_generator.constants import DEFAULT_BATCH_SIZE
13
- from src.distilabel_dataset_generator.apps.base import (
14
  hide_success_message,
15
  show_success_message,
16
  validate_argilla_user_workspace_dataset,
17
  validate_push_to_hub,
18
  )
19
- from src.distilabel_dataset_generator.pipelines.embeddings import (
20
  get_embeddings,
21
  get_sentence_embedding_dimensions,
22
  )
23
- from src.distilabel_dataset_generator.pipelines.textcat import (
24
  DEFAULT_DATASET_DESCRIPTIONS,
25
  generate_pipeline_code,
26
  get_labeller_generator,
27
  get_prompt_generator,
28
  get_textcat_generator,
29
  )
30
- from src.distilabel_dataset_generator.utils import (
31
  get_argilla_client,
32
  get_org_dropdown,
33
  get_preprocess_labels,
34
  swap_visibility,
35
  )
 
36
 
37
 
38
  def generate_system_prompt(dataset_description, temperature, progress=gr.Progress()):
 
9
  from distilabel.distiset import Distiset
10
  from huggingface_hub import HfApi
11
 
12
+ from src.synthetic_dataset_generator.apps.base import (
 
13
  hide_success_message,
14
  show_success_message,
15
  validate_argilla_user_workspace_dataset,
16
  validate_push_to_hub,
17
  )
18
+ from src.synthetic_dataset_generator.pipelines.embeddings import (
19
  get_embeddings,
20
  get_sentence_embedding_dimensions,
21
  )
22
+ from src.synthetic_dataset_generator.pipelines.textcat import (
23
  DEFAULT_DATASET_DESCRIPTIONS,
24
  generate_pipeline_code,
25
  get_labeller_generator,
26
  get_prompt_generator,
27
  get_textcat_generator,
28
  )
29
+ from src.synthetic_dataset_generator.utils import (
30
  get_argilla_client,
31
  get_org_dropdown,
32
  get_preprocess_labels,
33
  swap_visibility,
34
  )
35
+ from synthetic_dataset_generator.constants import DEFAULT_BATCH_SIZE
36
 
37
 
38
  def generate_system_prompt(dataset_description, temperature, progress=gr.Progress()):
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/constants.py RENAMED
File without changes
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/__init__.py RENAMED
File without changes
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/base.py RENAMED
@@ -1,4 +1,4 @@
1
- from distilabel_dataset_generator.constants import API_KEYS
2
 
3
  TOKEN_INDEX = 0
4
 
 
1
+ from synthetic_dataset_generator.constants import API_KEYS
2
 
3
  TOKEN_INDEX = 0
4
 
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/embeddings.py RENAMED
@@ -3,7 +3,7 @@ from typing import List
3
  from sentence_transformers import SentenceTransformer
4
  from sentence_transformers.models import StaticEmbedding
5
 
6
- from distilabel_dataset_generator.constants import STATIC_EMBEDDING_MODEL
7
 
8
  static_embedding = StaticEmbedding.from_model2vec(STATIC_EMBEDDING_MODEL)
9
  model = SentenceTransformer(modules=[static_embedding])
 
3
  from sentence_transformers import SentenceTransformer
4
  from sentence_transformers.models import StaticEmbedding
5
 
6
+ from synthetic_dataset_generator.constants import STATIC_EMBEDDING_MODEL
7
 
8
  static_embedding = StaticEmbedding.from_model2vec(STATIC_EMBEDDING_MODEL)
9
  model = SentenceTransformer(modules=[static_embedding])
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/eval.py RENAMED
@@ -5,9 +5,9 @@ from distilabel.steps.tasks import (
5
  UltraFeedback,
6
  )
7
 
8
- from distilabel_dataset_generator.constants import BASE_URL, MODEL
9
- from distilabel_dataset_generator.pipelines.base import _get_next_api_key
10
- from distilabel_dataset_generator.utils import extract_column_names
11
 
12
 
13
  def get_ultrafeedback_evaluator(aspect, is_sample):
 
5
  UltraFeedback,
6
  )
7
 
8
+ from synthetic_dataset_generator.constants import BASE_URL, MODEL
9
+ from synthetic_dataset_generator.pipelines.base import _get_next_api_key
10
+ from synthetic_dataset_generator.utils import extract_column_names
11
 
12
 
13
  def get_ultrafeedback_evaluator(aspect, is_sample):
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/sft.py RENAMED
@@ -1,12 +1,12 @@
1
  from distilabel.llms import InferenceEndpointsLLM
2
  from distilabel.steps.tasks import ChatGeneration, Magpie, TextGeneration
3
 
4
- from distilabel_dataset_generator.constants import (
5
  BASE_URL,
6
  MAGPIE_PRE_QUERY_TEMPLATE,
7
  MODEL,
8
  )
9
- from distilabel_dataset_generator.pipelines.base import _get_next_api_key
10
 
11
  INFORMATION_SEEKING_PROMPT = (
12
  "You are an AI assistant designed to provide accurate and concise information on a wide"
 
1
  from distilabel.llms import InferenceEndpointsLLM
2
  from distilabel.steps.tasks import ChatGeneration, Magpie, TextGeneration
3
 
4
+ from synthetic_dataset_generator.constants import (
5
  BASE_URL,
6
  MAGPIE_PRE_QUERY_TEMPLATE,
7
  MODEL,
8
  )
9
+ from synthetic_dataset_generator.pipelines.base import _get_next_api_key
10
 
11
  INFORMATION_SEEKING_PROMPT = (
12
  "You are an AI assistant designed to provide accurate and concise information on a wide"
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/pipelines/textcat.py RENAMED
@@ -9,9 +9,9 @@ from distilabel.steps.tasks import (
9
  )
10
  from pydantic import BaseModel, Field
11
 
12
- from distilabel_dataset_generator.constants import BASE_URL, MODEL
13
- from distilabel_dataset_generator.pipelines.base import _get_next_api_key
14
- from distilabel_dataset_generator.utils import get_preprocess_labels
15
 
16
  PROMPT_CREATION_PROMPT = """You are an AI assistant specialized in generating very precise text classification tasks for dataset creation.
17
 
 
9
  )
10
  from pydantic import BaseModel, Field
11
 
12
+ from synthetic_dataset_generator.constants import BASE_URL, MODEL
13
+ from synthetic_dataset_generator.pipelines.base import _get_next_api_key
14
+ from synthetic_dataset_generator.utils import get_preprocess_labels
15
 
16
  PROMPT_CREATION_PROMPT = """You are an AI assistant specialized in generating very precise text classification tasks for dataset creation.
17
 
src/{distilabel_dataset_generator β†’ synthetic_dataset_generator}/utils.py RENAMED
@@ -12,7 +12,7 @@ from gradio.oauth import (
12
  from huggingface_hub import whoami
13
  from jinja2 import Environment, meta
14
 
15
- from distilabel_dataset_generator.constants import argilla_client
16
 
17
 
18
  def get_duplicate_button():
 
12
  from huggingface_hub import whoami
13
  from jinja2 import Environment, meta
14
 
15
+ from synthetic_dataset_generator.constants import argilla_client
16
 
17
 
18
  def get_duplicate_button():