Spaces:

and-effect
/

Musterdatenkatalog

Running

App Files Files Community

Rahkakavee Baskaran commited on May 14, 2023

Commit

4547220

1 Parent(s): 3592072

add app

Browse files

Files changed (1) hide show

app.py +113 -13

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ import json
 from itertools import islice
 from typing import Generator
 from plotly import express as px
 def chunks(data: dict, size=13) -> Generator:
@@ -55,12 +58,11 @@ def load_json(path: str) -> dict:
 # Load Data
 data = load_json("data.json")
 taxonomy = load_json("taxonomy_processed_v3.json")
 theme_counts = dict(Counter([el["THEMA"] for el in data]))
 labels_counts = dict(Counter([el["BEZEICHNUNG"] for el in data]))
-taxonomy = taxonomy
 names = [""]
 parents = ["Musterdatenkatalog"]
@@ -79,17 +81,6 @@ parents, name, values = get_tree_map_data(
     root="Musterdatenkatalog",
 )
-# fig = go.Figure(
-#     go.Treemap(
-#         labels=name,
-#         parents=parents,
-#         root_color="white",
-#         values=values,
-#         # textinfo="label+value",
-#     ),
-# )
 fig = px.treemap(
     names=name,
     parents=parents,
@@ -103,6 +94,115 @@ fig.update_layout(
 )
 st.title("Musterdatenkatalog")
 st.plotly_chart(fig)

 from itertools import islice
 from typing import Generator
 from plotly import express as px
+from safetensors import safe_open
+from semantic_search import predict
+from sentence_transformers import SentenceTransformer
 def chunks(data: dict, size=13) -> Generator:
 # Load Data
 data = load_json("data.json")
 taxonomy = load_json("taxonomy_processed_v3.json")
+taxonomy_labels = [el["group"] + " - " + el["label"] for el in taxonomy]
 theme_counts = dict(Counter([el["THEMA"] for el in data]))
 labels_counts = dict(Counter([el["BEZEICHNUNG"] for el in data]))
 names = [""]
 parents = ["Musterdatenkatalog"]
     root="Musterdatenkatalog",
 )
 fig = px.treemap(
     names=name,
     parents=parents,
 )
+tensors = {}
+with safe_open("corpus_embeddings.pt", framework="pt", device="cpu") as f:
+    for k in f.keys():
+        tensors[k] = f.get_tensor(k)
+model = SentenceTransformer(
+    model_name_or_path="and-effect/musterdatenkatalog_clf",
+    device="cpu",
+    use_auth_token=True,
+)
+st.set_page_config(layout="wide")
 st.title("Musterdatenkatalog")
+col1, col2, col3 = st.columns(3)
+col1.metric("Kommunale Datensätze", len(data))
+col2.metric("Themen", len(theme_counts))
+col3.metric("Bezeichnungen", len(labels_counts))
+st.title("Taxonomy")
 st.plotly_chart(fig)
+st.title("Predict a Dataset")
+# create two columns and make left column wider
+# st.markdown(
+#     """
+# <style>
+#     div[data-testid="stVerticalBlock"] div[style*="flex-direction: column;"] div[data-testid="stVerticalBlock"] {
+#         border-radius: 15px;
+#         background-color: white;
+#         box-shadow: 0 0 10px #eee;
+#         border: 1px solid #ddd;
+#         padding: 1rem;;
+#     }
+# </style>
+# """,
+#     unsafe_allow_html=True,
+# )
+st.markdown(
+    """
+<style>
+/* Style columns */
+[data-testid="column"] {
+      border-radius: 15px;
+         background-color: white;
+         box-shadow: 0 0 10px #eee;
+         border: 1px solid #ddd;
+         padding: 1rem;;
+}
+/* Style containers */
+[data-testid="stVerticalBlock"] > [style*="flex-direction: column;"] > [data-testid="stVerticalBlock"] {
+      border-radius: 15px;
+         background-color: white;
+         box-shadow: 0 0 10px #eee;
+         border: 1px solid #ddd;
+         padding: 1rem;;
+}
+</style>
+""",
+    unsafe_allow_html=True,
+)
+col1, col2 = st.columns([1.2, 1])
+with col2:
+    st.subheader("Example Datasets")
+    examples = [
+        "Spielplätze",
+        "Berliner Weihnachtsmärkte 2022",
+        "Hochschulwechslerquoten zum Masterstudium nach Bundesländern",
+        "Umringe der Bebauungspläne von Etgert",
+    ]
+    for example in examples:
+        if st.button(example):
+            if "key" not in st.session_state:
+                st.session_state["query"] = example
+with col1:
+    if "query" not in st.session_state:
+        query = st.text_input(
+            "Enter dataset name",
+        )
+    if "query" in st.session_state and st.session_state.query in examples:
+        query = st.text_input("Enter dataset name", value=st.session_state.query)
+    if "query" in st.session_state and st.session_state.query not in examples:
+        del st.session_state["query"]
+        query = st.text_input("Enter dataset name")
+    top_k = st.select_slider("Top Results", options=[1, 2, 3, 4, 5], value=1)
+    predictions = predict(
+        query=query,
+        corpus_embeddings=tensors["corpus_embeddings"],
+        corpus_labels=taxonomy_labels,
+        top_k=top_k,
+        model=model,
+    )
+    if st.button("Predict"):
+        for prediction in predictions:
+            st.write(prediction)