File size: 1,918 Bytes
c7cc986
ff82de9
48aa054
c7cc986
 
 
b75c1aa
 
360bd52
c7cc986
360bd52
c7cc986
 
360bd52
ff82de9
48aa054
 
360bd52
c7cc986
 
b75c1aa
c7cc986
 
b75c1aa
360bd52
 
 
 
 
 
 
 
 
c7cc986
48aa054
 
 
 
 
 
 
 
 
 
c7cc986
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import streamlit as st
from datasets import load_dataset, concatenate_datasets
import os

def load_and_combine_datasets():
    python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train')
    streamlit_issues_dataset = load_dataset("andfanilo/streamlit-issues", split='train')
    streamlit_docs_dataset = load_dataset("sai-lohith/streamlit_docs", split='train')
    
    combined_dataset = concatenate_datasets([python_codes_dataset, streamlit_issues_dataset, streamlit_docs_dataset])
    
    return combined_dataset

def save_combined_dataset(combined_dataset, dataset_name):
    combined_dataset.save_to_disk(dataset_name)
    # Print the location where the dataset is saved
    st.write(f"Dataset saved at: {dataset_name}")

def main():
    st.title("Combined Dataset Viewer")

    # Load and combine datasets
    combined_dataset = load_and_combine_datasets()

    # Display a subset of the combined dataset
    st.write("Subset of Combined Dataset:", combined_dataset[:10])

    # Add option to save the combined dataset
    if st.button("Save Combined Dataset"):
        dataset_name = st.text_input("Enter a name for the combined dataset:")
        if dataset_name:
            save_combined_dataset(combined_dataset, dataset_name)
            st.success(f"Combined dataset saved as '{dataset_name}'!")

    # Add option to download the combined dataset
    if st.button("Download Combined Dataset"):
        dataset_name = st.text_input("Enter a name for the combined dataset:")
        if dataset_name:
            save_combined_dataset(combined_dataset, dataset_name)
            filepath = os.path.join(os.getcwd(), dataset_name)
            filesize = os.path.getsize(filepath) / (1024 * 1024)  # Size in MB
            st.write(f"Download the combined dataset: [{dataset_name}]({filepath})")
            st.write(f"File Size: {filesize:.2f} MB")

if __name__ == "__main__":
    main()