BIRD-SQL-data

Sleeping

File size: 1,616 Bytes

f9f24ec
 
 
 
7ecbb3e
2319432
 
f9f24ec
7ecbb3e
f9f24ec
0974549
44421ab
0974549
f9f24ec
 
44421ab
 
 
 
 
49e9afa
44421ab
 
 
e16e691
c1b9076
44421ab
 
 
2bc5907
44421ab
 
0974549
 
 
 
 
8cff49c
89247af

import streamlit as st
import json
from datasets import load_dataset

st.set_page_config(page_title="Kaggle Notebooks inspection", layout="wide")

st.markdown("<h1 style='text-align: center; color: #00BFFF;'>Kaggle Notebooks inspection 🔍</h1>", unsafe_allow_html=True)

st.markdown("Here you can inspect Kaggle notebooks that were converted to python scripts and deduplicated.")
@st.cache()
def load_data(upvote=0):
    ds = load_dataset("loubnabnl/subset_kaggle_scripts", split="train")
    ds = ds.filter(lambda x: x["upvotes"] >= upvote)
    return ds

def show_extra_info(e):
    kv = json.loads(e["kversion"])[0]
    try:
        data_v = json.loads(e["dataset_versions"])[0]
    except:
        data_v = ""
    if data_v:
        data_title = data_v["Title"]
        import numpy as np
        description = data_v["Description"] if str(data_v["Description"]) != 'nan' else "<empty_description>"
        data_text = f"<br>**📚 Dataset description:**<br>Title: **{data_title}**, described as: {description}."
    else:
        data_text = ""
    
    text = f"The title of the notebook is: **{kv['Title']}** and it has **{kv['TotalVotes']} ⬆️ upvotes**.{data_text}"
    return text


vote = st.sidebar.slider("Minimum notebook ⬆️ upvotes", min_value=0, max_value=100, step=1, value=0)
samples = load_data(vote)
index_example = st.sidebar.number_input(f"Choose a sample from the existing {len(samples)} notebooks:", min_value=0, max_value=max(0, len(samples)-1), value=0, step=1)

st.markdown(show_extra_info(samples[index_example]), unsafe_allow_html=True)
st.code(samples[index_example]["script"])