File size: 3,400 Bytes
93db903
 
 
 
 
 
 
 
 
 
 
 
bd8fe60
 
 
 
 
 
 
93db903
 
 
 
 
 
bd8fe60
 
 
 
 
 
 
 
93db903
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd8fe60
93db903
 
 
 
 
 
 
 
 
 
bd8fe60
93db903
 
bd8fe60
 
 
93db903
 
 
 
 
 
 
bd8fe60
 
 
 
 
 
93db903
 
 
 
bd8fe60
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import re
import numpy as np
import pandas as pd
import plotly.express as px
import streamlit as st

st.set_page_config(layout="wide")
DATA_FILE = "data/gwf_2017-2021_specter2_base.json"
THEMES = {"cluster": "fall", "year": "mint", "source": "phase"}


def decorated_to_string(list_of_things):
    if len(list_of_things) > 2:
        return ", ".join(list_of_things[:-1]) + ", and " + list_of_things[-1]
    else:
        return " and ".join(list_of_things)


def load_df(data_file: os.PathLike):
    df = pd.read_json(data_file, orient="records")
    df["x"] = df["point2d"].apply(lambda x: x[0])
    df["y"] = df["point2d"].apply(lambda x: x[1])
    df["year"] = df["year"].replace("", 0)
    df["year"] = df["year"].astype(int)

    df["authors_trimmed"] = df.authors.apply(
        lambda row: [(x[x.index(",") + 1:].strip() + " " + x.split(",")[0].strip()) if "," in x else x for x in row]
    )
    df["authors_trimmed"] = df.authors_trimmed.apply(
        lambda row: decorated_to_string(row[:5]) + (" et al." if len(row) > 5 else "")
    )

    if "publication_type" in df.columns:
        df["type"] = df["publication_type"]
        df = df.drop(columns=["point2d", "publication_type"])
    else:
        df = df.drop(columns=["point2d"])
    return df


@st.cache_data
def load_dataframe():
    return load_df(DATA_FILE)


DF = load_dataframe()
DF["opacity"] = 0.04
min_year, max_year = DF[DF["year"] > 0]["year"].min(), DF[DF["year"] > 0]["year"].max()

with st.sidebar:
    start_year, end_year = st.select_slider(
        "Publication year",
        options=[str(y) for y in range(min_year, max_year + 1)],
        value=(str(min_year), str(max_year)),
    )
    src = st.text_input("Source")

    author_names = st.text_input("Author names (separated by comma)")

    title = st.text_input("Title")

    start_year = int(start_year)
    end_year = int(end_year)
    df_mask = (DF["year"] >= start_year) & (DF["year"] <= end_year)

    if src:
        df_mask = df_mask & DF.source.apply(lambda x: src.lower() in x.lower())

    if author_names:
        authors = [a.strip() for a in author_names.split(",")]
        author_mask = DF.authors.apply(
            lambda row: all(any(re.match(rf".*{a}.*", x, re.IGNORECASE) for x in row) for a in authors)
        )
        df_mask = df_mask & author_mask

    if title:
        df_mask = df_mask & DF.title.apply(lambda x: title.lower() in x.lower())

    DF.loc[df_mask, "opacity"] = 1.0
    st.write(f"Number of points: {DF[df_mask].shape[0]}")

    color = st.selectbox("Color", ("cluster", "year", "source"))


fig = px.scatter(
    DF,
    x="x",
    y="y",
    opacity=DF["opacity"],
    color=color,
    width=1000,
    height=800,
    custom_data=("title", "authors_trimmed", "year", "source", "keywords"),
    color_continuous_scale=THEMES[color],
)
fig.update_traces(
    hovertemplate="<b>%{customdata[0]}</b><br>%{customdata[1]}<br>%{customdata[2]}<br><i>%{customdata[3]}</i><br><i font-size='8'>Keywords: %{customdata[4]}</i>"
)
fig.update_layout(
    # margin=dict(l=10, r=10, t=10, b=10),
    showlegend=False,
    font=dict(
        family="Times New Roman",
        size=30,
    ),
    hoverlabel=dict(
        align="left",
        font_size=14,
        font_family="Rockwell",
        namelength=-1,
    ),
)
fig.update_xaxes(title="")
fig.update_yaxes(title="")

st.plotly_chart(fig, use_container_width=True)