Saboorhsn commited on
Commit
0f6cf6a
·
verified ·
1 Parent(s): 48aa054

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  from datasets import load_dataset, concatenate_datasets
3
- import os
4
 
5
  def load_and_combine_datasets():
6
  python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train')
@@ -11,10 +11,11 @@ def load_and_combine_datasets():
11
 
12
  return combined_dataset
13
 
14
- def save_combined_dataset(combined_dataset, dataset_name):
15
- combined_dataset.save_to_disk(dataset_name)
16
- # Print the location where the dataset is saved
17
- st.write(f"Dataset saved at: {dataset_name}")
 
18
 
19
  def main():
20
  st.title("Combined Dataset Viewer")
@@ -25,22 +26,11 @@ def main():
25
  # Display a subset of the combined dataset
26
  st.write("Subset of Combined Dataset:", combined_dataset[:10])
27
 
28
- # Add option to save the combined dataset
29
- if st.button("Save Combined Dataset"):
30
- dataset_name = st.text_input("Enter a name for the combined dataset:")
31
- if dataset_name:
32
- save_combined_dataset(combined_dataset, dataset_name)
33
- st.success(f"Combined dataset saved as '{dataset_name}'!")
34
-
35
- # Add option to download the combined dataset
36
- if st.button("Download Combined Dataset"):
37
- dataset_name = st.text_input("Enter a name for the combined dataset:")
38
- if dataset_name:
39
- save_combined_dataset(combined_dataset, dataset_name)
40
- filepath = os.path.join(os.getcwd(), dataset_name)
41
- filesize = os.path.getsize(filepath) / (1024 * 1024) # Size in MB
42
- st.write(f"Download the combined dataset: [{dataset_name}]({filepath})")
43
- st.write(f"File Size: {filesize:.2f} MB")
44
 
45
  if __name__ == "__main__":
46
  main()
 
1
  import streamlit as st
2
  from datasets import load_dataset, concatenate_datasets
3
+ import json
4
 
5
  def load_and_combine_datasets():
6
  python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train')
 
11
 
12
  return combined_dataset
13
 
14
+ def save_combined_dataset_as_jsonl(combined_dataset, file_path):
15
+ with open(file_path, 'w', encoding='utf-8') as f:
16
+ for example in combined_dataset:
17
+ json.dump(example, f, ensure_ascii=False)
18
+ f.write('\n')
19
 
20
  def main():
21
  st.title("Combined Dataset Viewer")
 
26
  # Display a subset of the combined dataset
27
  st.write("Subset of Combined Dataset:", combined_dataset[:10])
28
 
29
+ # Add option to save the combined dataset as JSONL
30
+ if st.button("Save Combined Dataset (JSONL)"):
31
+ file_path = "combined_dataset.jsonl"
32
+ save_combined_dataset_as_jsonl(combined_dataset, file_path)
33
+ st.write(f"Combined dataset saved as JSONL file: {file_path}")
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  if __name__ == "__main__":
36
  main()