Spaces:
Sleeping
Sleeping
File size: 2,464 Bytes
c7cc986 ff82de9 0f6cf6a 30585ac 08e2876 c7cc986 b75c1aa 360bd52 c7cc986 360bd52 c7cc986 0f6cf6a 360bd52 c7cc986 30585ac b75c1aa c7cc986 b75c1aa 360bd52 30585ac 0f6cf6a 30585ac 0f6cf6a 48aa054 18adb0d 30585ac 18adb0d 08e2876 30585ac 08e2876 18adb0d c7cc986 308ef5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import streamlit as st
from datasets import load_dataset, concatenate_datasets
import json
import os
import base64
def load_and_combine_datasets():
python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train')
streamlit_issues_dataset = load_dataset("andfanilo/streamlit-issues", split='train')
streamlit_docs_dataset = load_dataset("sai-lohith/streamlit_docs", split='train')
combined_dataset = concatenate_datasets([python_codes_dataset, streamlit_issues_dataset, streamlit_docs_dataset])
return combined_dataset
def save_combined_dataset_as_jsonl(combined_dataset, file_path):
with open(file_path, 'w', encoding='utf-8') as f:
for example in combined_dataset:
json.dump(example, f, ensure_ascii=False)
f.write('\n')
def main():
st.title("Combined Dataset Viewer and Downloader")
# Load and combine datasets
combined_dataset = load_and_combine_datasets()
# Display a subset of the combined dataset
st.write("Subset of Combined Dataset:", combined_dataset[:10])
# Take input for output dataset name
output_dataset_name = st.text_input("Enter output dataset name (without extension):", "combined_dataset")
# Add option to save the combined dataset as JSONL
if st.button("Save Combined Dataset (JSONL)"):
file_path = os.path.join(os.getcwd(), f"{output_dataset_name}.jsonl")
save_combined_dataset_as_jsonl(combined_dataset, file_path)
st.write(f"Combined dataset saved as JSONL file: {file_path}")
# Add option to download the JSONL file
if st.button("Download Combined Dataset (JSONL)"):
file_path = os.path.join(os.getcwd(), f"{output_dataset_name}.jsonl")
save_combined_dataset_as_jsonl(combined_dataset, file_path)
st.write("Download the combined dataset as JSONL file:")
with open(file_path, "r") as f:
bytes_data = f.read().encode()
b64 = base64.b64encode(bytes_data).decode()
href = f'<a href="data:file/jsonl;base64,{b64}" download="{output_dataset_name}.jsonl">Download JSONL File</a>'
st.markdown(href, unsafe_allow_html=True)
# Provide download button
st.download_button(label="Click to Download",
data=bytes_data,
file_name=f"{output_dataset_name}.jsonl",
mime="application/jsonl")
if __name__ == "__main__":
main() |