Saboorhsn commited on
Commit
1058a79
·
verified ·
1 Parent(s): 24b4364

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -39
app.py CHANGED
@@ -6,23 +6,31 @@ import base64
6
  from datetime import datetime
7
 
8
  def load_and_combine_datasets():
9
- python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train')
10
- streamlit_issues_dataset = load_dataset("andfanilo/streamlit-issues", split='train')
11
- streamlit_docs_dataset = load_dataset("sai-lohith/streamlit_docs", split='train')
12
-
13
- combined_dataset = concatenate_datasets([python_codes_dataset, streamlit_issues_dataset, streamlit_docs_dataset])
14
-
15
- return combined_dataset
 
 
 
 
16
 
17
  def datetime_serializer(o):
18
  if isinstance(o, datetime):
19
  return o.strftime('%Y-%m-%d %H:%M:%S')
20
 
21
  def save_combined_dataset_as_jsonl(combined_dataset, file_path):
22
- with open(file_path, 'w', encoding='utf-8') as f:
23
- for example in combined_dataset:
24
- json.dump(example, f, ensure_ascii=False, default=datetime_serializer)
25
- f.write('\n')
 
 
 
 
26
 
27
  def main():
28
  st.title("Combined Dataset Viewer and Downloader")
@@ -30,34 +38,36 @@ def main():
30
  # Load and combine datasets
31
  combined_dataset = load_and_combine_datasets()
32
 
33
- # Display a subset of the combined dataset
34
- st.write("Subset of Combined Dataset:", combined_dataset[:10])
35
-
36
- # Take input for output dataset name
37
- output_dataset_name = st.text_input("Enter output dataset name (without extension):", "combined_dataset")
38
-
39
- # Add option to save the combined dataset as JSONL
40
- if st.button("Save Combined Dataset (JSONL)"):
41
- file_path = os.path.join(os.getcwd(), f"{output_dataset_name}.jsonl")
42
- save_combined_dataset_as_jsonl(combined_dataset, file_path)
43
- st.write(f"Combined dataset saved as JSONL file: {file_path}")
44
-
45
- # Add option to download the JSONL file
46
- if st.button("Download Combined Dataset (JSONL)"):
47
- file_path = os.path.join(os.getcwd(), f"{output_dataset_name}.jsonl")
48
- save_combined_dataset_as_jsonl(combined_dataset, file_path)
49
- st.write("Download the combined dataset as JSONL file:")
50
- with open(file_path, "rb") as f:
51
- bytes_data = f.read()
52
- b64 = base64.b64encode(bytes_data).decode()
53
- href = f'<a href="data:file/jsonl;base64,{b64}" download="{output_dataset_name}.jsonl">Download JSONL File</a>'
54
- st.markdown(href, unsafe_allow_html=True)
55
-
56
- # Provide download button
57
- st.download_button(label="Click to Download",
58
- data=bytes_data,
59
- file_name=f"{output_dataset_name}.jsonl",
60
- mime="application/jsonl")
 
 
61
 
62
  if __name__ == "__main__":
63
  main()
 
6
  from datetime import datetime
7
 
8
  def load_and_combine_datasets():
9
+ try:
10
+ python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train')
11
+ streamlit_issues_dataset = load_dataset("andfanilo/streamlit-issues", split='train')
12
+ streamlit_docs_dataset = load_dataset("sai-lohith/streamlit_docs", split='train')
13
+
14
+ combined_dataset = concatenate_datasets([python_codes_dataset, streamlit_issues_dataset, streamlit_docs_dataset])
15
+
16
+ return combined_dataset
17
+ except Exception as e:
18
+ st.error(f"Error loading datasets: {e}")
19
+ return None
20
 
21
  def datetime_serializer(o):
22
  if isinstance(o, datetime):
23
  return o.strftime('%Y-%m-%d %H:%M:%S')
24
 
25
  def save_combined_dataset_as_jsonl(combined_dataset, file_path):
26
+ try:
27
+ with open(file_path, 'w', encoding='utf-8') as f:
28
+ for example in combined_dataset:
29
+ json.dump(example, f, ensure_ascii=False, default=datetime_serializer)
30
+ f.write('\n')
31
+ st.success(f"Combined dataset saved as JSONL file: {file_path}")
32
+ except Exception as e:
33
+ st.error(f"Error saving dataset: {e}")
34
 
35
  def main():
36
  st.title("Combined Dataset Viewer and Downloader")
 
38
  # Load and combine datasets
39
  combined_dataset = load_and_combine_datasets()
40
 
41
+ if combined_dataset is not None:
42
+ # Display a subset of the combined dataset
43
+ st.write("Subset of Combined Dataset:", combined_dataset[:10])
44
+
45
+ # Take input for output dataset name
46
+ output_dataset_name = st.text_input("Enter output dataset name (without extension):", "combined_dataset")
47
+
48
+ # Add option to save the combined dataset as JSONL
49
+ if st.button("Save Combined Dataset (JSONL)"):
50
+ file_path = os.path.join(os.getcwd(), f"{output_dataset_name}.jsonl")
51
+ save_combined_dataset_as_jsonl(combined_dataset, file_path)
52
+
53
+ # Add option to download the JSONL file
54
+ if st.button("Download Combined Dataset (JSONL)"):
55
+ file_path = os.path.join(os.getcwd(), f"{output_dataset_name}.jsonl")
56
+ save_combined_dataset_as_jsonl(combined_dataset, file_path)
57
+ try:
58
+ with open(file_path, "rb") as f:
59
+ bytes_data = f.read()
60
+ b64 = base64.b64encode(bytes_data).decode()
61
+ href = f'<a href="data:file/jsonl;base64,{b64}" download="{output_dataset_name}.jsonl">Download JSONL File</a>'
62
+ st.markdown(href, unsafe_allow_html=True)
63
+
64
+ # Provide download button
65
+ st.download_button(label="Click to Download",
66
+ data=bytes_data,
67
+ file_name=f"{output_dataset_name}.jsonl",
68
+ mime="application/jsonl")
69
+ except Exception as e:
70
+ st.error(f"Error preparing download: {e}")
71
 
72
  if __name__ == "__main__":
73
  main()