File size: 2,965 Bytes
28be794
 
3454357
4b949d0
72c7204
 
3454357
f4547e9
 
72c7204
 
f4547e9
 
 
 
 
 
72c7204
f4547e9
72c7204
f4547e9
 
72c7204
28be794
 
 
3454357
2733741
 
 
 
3454357
 
 
 
 
 
 
 
 
 
 
35b1251
 
 
 
 
 
 
 
 
 
28be794
a203666
35b1251
 
 
28be794
35b1251
 
 
28be794
35b1251
 
 
 
 
 
 
28be794
35b1251
 
28be794
35b1251
28be794
35b1251
 
28be794
 
35b1251
 
28be794
35b1251
 
 
 
 
3454357
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
import pandas as pd
import altair as alt
!pip install -U sentence-transformers
from sentence_transformers import SentenceTransformer, util
from sentence_transformers import SentenceTransformer
import numpy as np
import sys
import json
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, LoggingHandler, util, models, evaluation, losses, InputExample
import logging
from datetime import datetime
import gzip
import os
import tarfile
from collections import defaultdict
from torch.utils.data import IterableDataset
import tqdm
from torch.utils.data import Dataset
import random
from shutil import copyfile
from urllib.error import URLError

# Load document embeddings

# set up title and sidebar
st.title(" Your Top 3 Important Sessions")
st.markdown("This application is a dashboard for displaying your top 3 Sessions at the summit")

doc_emb = np.loadtxt("abstract-embed.txt", dtype=float)


    # Load data
df = pd.read_csv("sessions.csv", usecols=['Unique ID', 'Name', 'Description', 'Activity Code', 'Start Time', 'End Time', 'Location Name'])

        # front end elements of the web page
html_temp = """
<div style ="background-color:lightblue;padding:13px">
<h1 style ="color:white;text-align:center;">Sentence Similarity App Nashville Analytic Summit</h1>
</div>
        """
def main():
        # display the front end aspect
    st.markdown(html_temp, unsafe_allow_html = True)

        # Get attributes from dataframe
    docs = list(df["Description"])
    titles = list(df["Name"])
    start_times = list(df["Start Time"])
    end_times = list(df["End Time"])
    locations = list(df["Location Name"])
# Query
# Load the model
    model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')

    query =  st.text_input("Enter your query: ")

    if query:
#st.text_area('Text area')
        #age = st.number_input("Age in Years")
#Encode query and documents
        query_emb = model.encode(query).astype(float)

    #Compute dot score between query and all document embeddings
        scores = util.dot_score(query_emb, doc_emb.astype(float))[0].cpu().tolist()

    #Combine docs & scores with other attributes
        doc_score_pairs = list(zip(docs, scores, titles, start_times, end_times, locations))

    # top_k results to return
        top_k=3

        print(" Your top", top_k, "most similar sessions in the Summit:")

    #Sort by decreasing score
        doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)


    #Output presentation recommendations
        for doc, score, title, start_time, end_time, location in doc_score_pairs[:top_k]:

            st.write("Score: %f" %score)
            st.write("Title: %s" %title)
            st.write("Abstract: %s" %doc)
            st.write("Location: %s" %location)
            st.write(f"From {start_time} to {end_time}")
            st.write('\n')


if __name__ == "__main__":
    main()