File size: 1,749 Bytes
c7cc986
ff82de9
0f6cf6a
c7cc986
 
 
b75c1aa
 
360bd52
c7cc986
360bd52
c7cc986
 
0f6cf6a
 
 
 
 
360bd52
c7cc986
 
b75c1aa
c7cc986
 
b75c1aa
360bd52
 
 
0f6cf6a
 
 
 
 
48aa054
18adb0d
 
 
 
 
 
 
c7cc986
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import streamlit as st
from datasets import load_dataset, concatenate_datasets
import json

def load_and_combine_datasets():
    python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train')
    streamlit_issues_dataset = load_dataset("andfanilo/streamlit-issues", split='train')
    streamlit_docs_dataset = load_dataset("sai-lohith/streamlit_docs", split='train')
    
    combined_dataset = concatenate_datasets([python_codes_dataset, streamlit_issues_dataset, streamlit_docs_dataset])
    
    return combined_dataset

def save_combined_dataset_as_jsonl(combined_dataset, file_path):
    with open(file_path, 'w', encoding='utf-8') as f:
        for example in combined_dataset:
            json.dump(example, f, ensure_ascii=False)
            f.write('\n')

def main():
    st.title("Combined Dataset Viewer")

    # Load and combine datasets
    combined_dataset = load_and_combine_datasets()

    # Display a subset of the combined dataset
    st.write("Subset of Combined Dataset:", combined_dataset[:10])

    # Add option to save the combined dataset as JSONL
    if st.button("Save Combined Dataset (JSONL)"):
        file_path = "combined_dataset.jsonl"
        save_combined_dataset_as_jsonl(combined_dataset, file_path)
        st.write(f"Combined dataset saved as JSONL file: {file_path}")

    # Add option to download the JSONL file
    if st.button("Download Combined Dataset (JSONL)"):
        file_path = "combined_dataset.jsonl"
        save_combined_dataset_as_jsonl(combined_dataset, file_path)
        st.write("Download the combined dataset as JSONL file:")
        st.download_button(label="Download", data=open(file_path, "rb"), file_name="combined_dataset.jsonl")

if __name__ == "__main__":
    main()