File size: 2,803 Bytes
69881c2
 
 
 
 
1a3c951
44df7d3
 
69881c2
44df7d3
 
 
 
1a3c951
44df7d3
 
1a3c951
44df7d3
1a3c951
44df7d3
 
 
 
 
1a3c951
800a230
44df7d3
 
 
800a230
44df7d3
1a3c951
 
44df7d3
 
 
 
 
 
 
1a3c951
44df7d3
 
 
 
1a3c951
44df7d3
1a3c951
 
 
 
 
 
 
bddb13b
1a3c951
69881c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
# To make things easier later, we're also importing numpy and pandas for
# working with sample data.
import numpy as np
import pandas as pd
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel

# Load the embedding model and tokenizer
model_name = "moka-ai/m3e-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Generate some random text contents
texts = ["This is the first document.", "This is the second document.", "And this is the third one.", "Is this the first document?"]

# Convert the text contents to embeddings
embeddings = []
for text in texts:
    input_ids = tokenizer.encode(text, return_tensors="pt")
    with torch.no_grad():
        embedding = model(input_ids)[0][0].numpy()
        embeddings.append(embedding)
embeddings = np.array(embeddings)

# Create a Faiss index
d = embeddings.shape[1]  # Dimension of the embeddings
index = faiss.IndexFlatIP(d)  # Index that uses inner product (dot product) similarity

# Add the embeddings to the index
index.add(embeddings)

# Search for similar documents
query = "This is a new document."
input_ids = tokenizer.encode(query, return_tensors="pt")
with torch.no_grad():
    query_embedding = model(input_ids)[0][0].numpy()
k = 2  # Number of similar documents to retrieve
D, I = index.search(query_embedding.reshape(1, -1), k)

# Print the results
st.write(f"Query: {query}")
for i in range(k):
    st.write(f"Rank {i+1}: {texts[I[0][i]]} (similarity score: {D[0][i]})")

    
# Search index for the most similar content
k = 5  # Number of results to retrieve
D, I = index.search(np.array([question_embedding]), k)

# Display the results
st.write("Top {} similar content:".format(k))
for i in range(k):
    st.write("{}: {} : {}".format(i+1, knowledge[I[0][i]], I[0][i]))

st.title('My first app')

st.write("Here's our first attempt at using data to create a table:")

df = pd.DataFrame({
    'first column': [1, 2, 3, 4],
    'second column': [10, 20, 30, 40]
})

st.write(df)

if st.checkbox('Show dataframe'):
    chart_data = pd.DataFrame(
       np.random.randn(20, 3),
       columns=['a', 'b', 'c'])

    chart_data
    

option = st.selectbox(
    'Which number do you like best?',
     df['first column'])

st.write('You selected: ', option)

text1 = st.text('This is some text.')

if st.button('Say hello'):
    st.write('Why hello there')
else:
    st.write('Goodbye')


agree = st.checkbox('I agree')

if agree:
    st.write('Great!')
    
age = st.slider('How old are you?', 0, 130, 25)

st.write("I'm ", age, 'years old')

title = st.text_input('Movie title', 'Life of Brian')

st.write('The current movie title is', title)

number = st.number_input('Insert a number')

st.write('The current number is ', number)