BIRD-SQL-data

Sleeping

File size: 1,423 Bytes

f9f24ec
 
 
 
7ecbb3e
2319432
 
f9f24ec
7ecbb3e
f9f24ec
 
44421ab
f9f24ec
 
44421ab
 
 
 
 
49e9afa
44421ab
 
 
 
2bc5907
44421ab
 
 
2bc5907
44421ab
 
f9f24ec
49e9afa
8cff49c
89247af

import streamlit as st
import json
from datasets import load_dataset

st.set_page_config(page_title="Kaggle Notebooks inspection", layout="wide")

st.markdown("<h1 style='text-align: center; color: #00BFFF;'>Kaggle Notebooks inspection 🔍</h1>", unsafe_allow_html=True)

st.markdown("Here you can inspect Kaggle notebooks that were converted to python scripts and deduplicated.")
@st.cache()
def load_data():
    ds = load_dataset("loubnabnl/subset_kaggle_scripts", split="train")
    return ds

def show_extra_info(e):
    kv = json.loads(e["kversion"])[0]
    try:
        data_v = json.loads(e["dataset_versions"])[0]
    except:
        data_v = ""
    if data_v:
        data_title = data_v["Title"]
        import numpy as np
        decription = data_v["Description"] if str(data_v["Description"]) != 'nan' else "<empty_description>"
        data_text = f"<br>##📚 Dataset description:<br>Title: **{data_title}**, described as: {decription}."
    else:
        data_text = ""
    
    text = f"The title of the notebook is: **{kv['Title']}** and it has **{kv['TotalVotes']} ⬆️ upvotes**.{data_text}"
    return text

samples = load_data()
index_example = st.number_input(f"Chose a sample from the existing {len(samples)} notebooks:", min_value=0, max_value=len(samples)-1, value=0, step=1)
st.markdown(show_extra_info(samples[index_example]), unsafe_allow_html=True)
st.code(samples[index_example]["script"])