File size: 3,393 Bytes
93db903
 
 
 
 
 
 
 
 
 
 
 
2ea702b
 
 
 
 
bd8fe60
2ea702b
bd8fe60
 
93db903
 
 
 
 
 
bd8fe60
 
2ea702b
 
 
bd8fe60
 
93db903
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd8fe60
93db903
 
 
 
 
 
 
 
 
 
bd8fe60
93db903
 
bd8fe60
 
 
93db903
 
 
 
 
 
 
bd8fe60
 
 
 
 
 
93db903
 
 
 
2ea702b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
import re
import numpy as np
import pandas as pd
import plotly.express as px
import streamlit as st

st.set_page_config(layout="wide")
DATA_FILE = "data/gwf_2017-2021_specter2_base.json"
THEMES = {"cluster": "fall", "year": "mint", "source": "phase"}


def to_string_authors(list_of_authors):
    if len(list_of_authors) > 6:
        return ", ".join(list_of_authors[:6]) + ", et al."
    elif len(list_of_authors) > 2:
        return ", ".join(list_of_authors[:-1]) + ", and " + list_of_authors[-1]
    else:
        return " and ".join(list_of_authors)


def load_df(data_file: os.PathLike):
    df = pd.read_json(data_file, orient="records")
    df["x"] = df["point2d"].apply(lambda x: x[0])
    df["y"] = df["point2d"].apply(lambda x: x[1])
    df["year"] = df["year"].replace("", 0)
    df["year"] = df["year"].astype(int)

    df["authors_trimmed"] = df.authors.apply(
        lambda row: to_string_authors(
            [(x[x.index(",") + 1 :].strip() + " " + x.split(",")[0].strip()) if "," in x else x for x in row]
        )
    )

    if "publication_type" in df.columns:
        df["type"] = df["publication_type"]
        df = df.drop(columns=["point2d", "publication_type"])
    else:
        df = df.drop(columns=["point2d"])
    return df


@st.cache_data
def load_dataframe():
    return load_df(DATA_FILE)


DF = load_dataframe()
DF["opacity"] = 0.04
min_year, max_year = DF[DF["year"] > 0]["year"].min(), DF[DF["year"] > 0]["year"].max()

with st.sidebar:
    start_year, end_year = st.select_slider(
        "Publication year",
        options=[str(y) for y in range(min_year, max_year + 1)],
        value=(str(min_year), str(max_year)),
    )
    src = st.text_input("Source")

    author_names = st.text_input("Author names (separated by comma)")

    title = st.text_input("Title")

    start_year = int(start_year)
    end_year = int(end_year)
    df_mask = (DF["year"] >= start_year) & (DF["year"] <= end_year)

    if src:
        df_mask = df_mask & DF.source.apply(lambda x: src.lower() in x.lower())

    if author_names:
        authors = [a.strip() for a in author_names.split(",")]
        author_mask = DF.authors.apply(
            lambda row: all(any(re.match(rf".*{a}.*", x, re.IGNORECASE) for x in row) for a in authors)
        )
        df_mask = df_mask & author_mask

    if title:
        df_mask = df_mask & DF.title.apply(lambda x: title.lower() in x.lower())

    DF.loc[df_mask, "opacity"] = 1.0
    st.write(f"Number of points: {DF[df_mask].shape[0]}")

    color = st.selectbox("Color", ("cluster", "year", "source"))


fig = px.scatter(
    DF,
    x="x",
    y="y",
    opacity=DF["opacity"],
    color=color,
    width=1000,
    height=800,
    custom_data=("title", "authors_trimmed", "year", "source", "keywords"),
    color_continuous_scale=THEMES[color],
)
fig.update_traces(
    hovertemplate="<b>%{customdata[0]}</b><br>%{customdata[1]}<br>%{customdata[2]}<br><i>%{customdata[3]}</i><br><i font-size='8'>Keywords: %{customdata[4]}</i>"
)
fig.update_layout(
    # margin=dict(l=10, r=10, t=10, b=10),
    showlegend=False,
    font=dict(
        family="Times New Roman",
        size=30,
    ),
    hoverlabel=dict(
        align="left",
        font_size=14,
        font_family="Rockwell",
        namelength=-1,
    ),
)
fig.update_xaxes(title="")
fig.update_yaxes(title="")

st.plotly_chart(fig, use_container_width=True)