lewtun HF staff commited on
Commit
51fe19a
1 Parent(s): 98f123b

Switch endpoint & use buffer for upload

Browse files
Files changed (1) hide show
  1. app.py +20 -30
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import datetime
2
- import json
3
  import os
4
  import re
5
- import shutil
6
 
7
  import gradio as gr
8
- from huggingface_hub import Repository
 
9
  from text_generation import Client
10
 
11
  from dialogues import DialogueTemplate
@@ -14,43 +14,33 @@ from share_btn import (community_icon_html, loading_icon_html, share_btn_css,
14
 
15
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
16
  API_TOKEN = os.environ.get("API_TOKEN", None)
 
17
 
18
  model2endpoint = {
19
  "starchat-alpha": "https://api-inference.huggingface.co/models/HuggingFaceH4/starcoderbase-finetuned-oasst1",
20
- "starchat-beta": "https://api-inference.huggingface.co/models/HuggingFaceH4/starchat-beta",
21
  }
22
  model_names = list(model2endpoint.keys())
23
 
24
- repo = None
25
- if HF_TOKEN:
26
- try:
27
- shutil.rmtree("./data/")
28
- except:
29
- pass
30
 
31
- repo = Repository(
32
- local_dir="./data/",
33
- clone_from="HuggingFaceH4/starchat_playground_dialogues",
34
- use_auth_token=HF_TOKEN,
 
 
 
 
 
 
 
 
 
35
  repo_type="dataset",
36
  )
37
- repo.git_pull()
38
-
39
 
40
- def save_inputs_and_outputs(now, inputs, outputs, generate_kwargs, model):
41
- current_hour = now.strftime("%Y-%m-%d_%H")
42
- file_name = f"prompts_{current_hour}.jsonl"
43
-
44
- if repo is not None:
45
- repo.git_pull(rebase=True)
46
- with open(os.path.join("data", file_name), "a", encoding="utf-8") as f:
47
- json.dump(
48
- {"model": model, "inputs": inputs, "outputs": outputs, "generate_kwargs": generate_kwargs},
49
- f,
50
- ensure_ascii=False,
51
- )
52
- f.write("\n")
53
- repo.push_to_hub()
54
 
55
 
56
  def get_total_inputs(inputs, chatbot, preprompt, user_name, assistant_name, sep):
 
1
  import datetime
 
2
  import os
3
  import re
4
+ from io import StringIO
5
 
6
  import gradio as gr
7
+ import pandas as pd
8
+ from huggingface_hub import upload_file
9
  from text_generation import Client
10
 
11
  from dialogues import DialogueTemplate
 
14
 
15
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
16
  API_TOKEN = os.environ.get("API_TOKEN", None)
17
+ DIALOGUES_DATASET = "HuggingFaceH4/starchat_playground_dialogues"
18
 
19
  model2endpoint = {
20
  "starchat-alpha": "https://api-inference.huggingface.co/models/HuggingFaceH4/starcoderbase-finetuned-oasst1",
21
+ "starchat-beta": "https://ddimh86h0wqthbhy.us-east-1.aws.endpoints.huggingface.cloud",
22
  }
23
  model_names = list(model2endpoint.keys())
24
 
 
 
 
 
 
 
25
 
26
+ def save_inputs_and_outputs(now, inputs, outputs, generate_kwargs, model):
27
+ buffer = StringIO()
28
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")
29
+ file_name = f"prompts_{timestamp}.jsonl"
30
+ data = {"model": model, "inputs": inputs, "outputs": outputs, "generate_kwargs": generate_kwargs}
31
+ pd.DataFrame([data]).to_json(buffer, orient="records", lines=True)
32
+
33
+ # Push to Hub
34
+ upload_file(
35
+ path_in_repo=f"{now.date()}/{now.hour}/{file_name}",
36
+ path_or_fileobj=buffer.getvalue().encode(),
37
+ repo_id=DIALOGUES_DATASET,
38
+ token=HF_TOKEN,
39
  repo_type="dataset",
40
  )
 
 
41
 
42
+ # Clean and rerun
43
+ buffer.close()
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
 
46
  def get_total_inputs(inputs, chatbot, preprompt, user_name, assistant_name, sep):