the-stack-inspection

Sleeping

App Files Files Community

loubnabnl HF Staff commited on Feb 13, 2023

Commit

66a3725

1 Parent(s): b10125a

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -19

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ st.title("The Stack data Inspection")
 df = pd.read_csv("extension_distribution.csv")
 all_extensions = df["extension"].tolist()
 tags = {}
 for index, row in df.iterrows():
     if row["language"] not in tags:
@@ -18,26 +19,32 @@ all_languages = list(tags.keys())
 @st.cache()
 def load_data(language, ext):
-    ds = load_dataset("loubnabnl/the-stack-inspection-data", data_dir=f"data/{language}/{ext}", split="train")
     return ds
-col1, col2, col3 = st.columns([1, 1, 4])
 with col1:
     chosen_language = st.selectbox(
-    label="Select a programming language",
-    options=all_languages,
-    index=0)
 with col2:
     chosen_ext = st.selectbox(
-    label="Select an extension",
-    options=tags[chosen_language],
-    index=0)
 samples = load_data(chosen_language, chosen_ext)
 max_docs = len(samples)
 samples = samples.add_column("idx", range(len(samples)))
-not_lexed = samples.filter(lambda x: not x['lexable'])
-indexes_not_lexed = not_lexed['idx']
 # info about extension
 st.markdown("### Information about the extension:")
@@ -46,23 +53,30 @@ text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == cho
 are not lexable. These files are at indexes: {indexes_not_lexed}."
 st.markdown(text)
-col_1, col_2 = st.columns([2, 4])
 with col_1:
-    index_example = st.number_input(f"Extension {chosen_ext} has {max_docs} files, choose one to visualize:", min_value=0, max_value=max_docs-1, value=0, step=1)
 # info about the chosen example
 example = samples[index_example]
 st.markdown("#### Information about the chosen example:")
-text_alpha = "**has**" if example['long_lines'] else "doesn't have"
-text_lines = "**has**" if example['low_alphanum'] else "doesn't have"
-text_lexer = "is" if example['lexable'] else "**isn't**"
-st.markdown(f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
-    {text_lines} very long lines,  and {text_lexer} lexable.")
 st.markdown("#### File content:")
 st.code(example["content"], language=chosen_language)

 df = pd.read_csv("extension_distribution.csv")
 all_extensions = df["extension"].tolist()
 tags = {}
 for index, row in df.iterrows():
     if row["language"] not in tags:
 @st.cache()
 def load_data(language, ext):
+    ds = load_dataset(
+        "loubnabnl/the-stack-inspection-data",
+        data_dir=f"data/{language}/{ext}",
+        split="train",
+    )
     return ds
+col1, col2, _ = st.columns([1, 1, 4])
 with col1:
     chosen_language = st.selectbox(
+        label="Select a programming language", options=all_languages, index=0
+    )
 with col2:
     chosen_ext = st.selectbox(
+        label="Select an extension", options=tags[chosen_language], index=0
+    )
+# load the dataset and get indexes of non lexable files
 samples = load_data(chosen_language, chosen_ext)
 max_docs = len(samples)
 samples = samples.add_column("idx", range(len(samples)))
+not_lexed = samples.filter(lambda x: not x["lexable"])
+indexes_not_lexed = not_lexed["idx"]
 # info about extension
 st.markdown("### Information about the extension:")
 are not lexable. These files are at indexes: {indexes_not_lexed}."
 st.markdown(text)
+col_1, _ = st.columns([2, 4])
 with col_1:
+    index_example = st.number_input(
+        f"Extension {chosen_ext} has {max_docs} files, choose one to visualize:",
+        min_value=0,
+        max_value=max_docs - 1,
+        value=0,
+        step=1,
+    )
 # info about the chosen example
 example = samples[index_example]
 st.markdown("#### Information about the chosen example:")
+text_alpha = "**has**" if example["long_lines"] else "doesn't have"
+text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
+text_lexer = "is" if example["lexable"] else "**isn't**"
+st.markdown(
+    f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
+    {text_lines} very long lines,  and {text_lexer} lexable."
+)
+# display file content
 st.markdown("#### File content:")
 st.code(example["content"], language=chosen_language)