File size: 2,797 Bytes
d9ce745
1981c78
 
 
10d9795
d9ce745
22ca17f
15ea0fb
 
 
22ca17f
15ea0fb
 
 
 
22ca17f
15ea0fb
 
22ca17f
 
8275bd6
 
22ca17f
 
 
 
 
 
 
d9ce745
1981c78
 
 
04b8ab3
 
 
10d9795
 
 
232a10d
1981c78
 
 
 
 
 
28e14c5
232a10d
1981c78
232a10d
28e14c5
1981c78
 
 
 
 
 
 
 
 
04b8ab3
1981c78
 
 
 
10296ed
 
ca564a1
10296ed
ca564a1
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import streamlit as st
from topics import TopicModelling
import mdforest
import utils
import os



col1, mid, col2 = st.columns([30,5,20])
with col1:
    st.title("Welcome to Embeddr")
    st.markdown("This is a demo of _one of the many_ use cases for an embedding of all your notes. This application lets you find **common ideas** between any two notes.")
    st.markdown("You can upload two markdown files and the application will find the common ideas between them. It will generate insights based on the common ideas.")
    st.markdown("**I will be building a better embedding model soon.** Stay tuned for updates. This is just a demo of what is possible with a good embedding model.")
with col2:
    st.markdown("### [Sign up for updates](https://embeddr.my.canva.site/)")
    st.image("media/qrcode.png")

st.markdown("---")

st.markdown("## Drop in two documents and get insights between them.")

col3, mid2, col4 = st.columns([40,5,40])
with col3:
    st.markdown("### Drop the first document")
    file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first")
with col4:
    st.markdown("### Drop the second document")
    file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second")

topics = {}
results = {}

embedder = utils.load_model()
nlp = utils.load_nlp()

if not os.path.exists("./prompter/"):
    os.mkdir("./prompter/")

if file1 is not None and file2 is not None:
    
    input_text1 = file1.read().decode("utf-8")
    input_text2 = file2.read().decode("utf-8")
        
    cleaned_text1 = mdforest.clean_markdown(input_text1)
    cleaned_text2 = mdforest.clean_markdown(input_text2) 
       
    st.title("Generating insights")
    
    with st.spinner('Generating insights...'):
        
        insight1 = TopicModelling(cleaned_text1)
        insight2 = TopicModelling(cleaned_text2)
        
        keywords1, concepts1 = insight1.generate_topics()
        topics['insight1'] = [keywords1, concepts1]
        keywords2, concepts2 = insight2.generate_topics()
        topics['insight2'] = [keywords2, concepts2]
        
    with st.spinner("Flux capacitor is fluxing..."):
        clutered = utils.cluster_based_on_topics(nlp, embedder, cleaned_text1, cleaned_text2, num_clusters=3)
        
    with st.spinner("Polishing up"):
        results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered)
        st.success("Done!")
        
    st.title("Insights generated")
    st.markdown("### The following insights are common to both documents.")
    for result in results:
        with st.expander(result["name"]):
            st.write(result["description"])
            st.markdown("Related Concepts:")
            for insight in result["concepts"]:
                st.markdown(f" - {insight}")