Spaces:

dengkane
/

learn-streamlit

Sleeping

File size: 2,803 Bytes

69881c2
 
 
 
 
1a3c951
44df7d3
 
69881c2
44df7d3
 
 
 
1a3c951
44df7d3
 
1a3c951
44df7d3
1a3c951
44df7d3
 
 
 
 
1a3c951
800a230
44df7d3
 
 
800a230
44df7d3
1a3c951
 
44df7d3
 
 
 
 
 
 
1a3c951
44df7d3
 
 
 
1a3c951
44df7d3
1a3c951
 
 
 
 
 
 
bddb13b
1a3c951
69881c2

import streamlit as st
# To make things easier later, we're also importing numpy and pandas for
# working with sample data.
import numpy as np
import pandas as pd
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel

# Load the embedding model and tokenizer
model_name = "moka-ai/m3e-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Generate some random text contents
texts = ["This is the first document.", "This is the second document.", "And this is the third one.", "Is this the first document?"]

# Convert the text contents to embeddings
embeddings = []
for text in texts:
    input_ids = tokenizer.encode(text, return_tensors="pt")
    with torch.no_grad():
        embedding = model(input_ids)[0][0].numpy()
        embeddings.append(embedding)
embeddings = np.array(embeddings)

# Create a Faiss index
d = embeddings.shape[1]  # Dimension of the embeddings
index = faiss.IndexFlatIP(d)  # Index that uses inner product (dot product) similarity

# Add the embeddings to the index
index.add(embeddings)

# Search for similar documents
query = "This is a new document."
input_ids = tokenizer.encode(query, return_tensors="pt")
with torch.no_grad():
    query_embedding = model(input_ids)[0][0].numpy()
k = 2  # Number of similar documents to retrieve
D, I = index.search(query_embedding.reshape(1, -1), k)

# Print the results
st.write(f"Query: {query}")
for i in range(k):
    st.write(f"Rank {i+1}: {texts[I[0][i]]} (similarity score: {D[0][i]})")

    
# Search index for the most similar content
k = 5  # Number of results to retrieve
D, I = index.search(np.array([question_embedding]), k)

# Display the results
st.write("Top {} similar content:".format(k))
for i in range(k):
    st.write("{}: {} : {}".format(i+1, knowledge[I[0][i]], I[0][i]))

st.title('My first app')

st.write("Here's our first attempt at using data to create a table:")

df = pd.DataFrame({
    'first column': [1, 2, 3, 4],
    'second column': [10, 20, 30, 40]
})

st.write(df)

if st.checkbox('Show dataframe'):
    chart_data = pd.DataFrame(
       np.random.randn(20, 3),
       columns=['a', 'b', 'c'])

    chart_data
    

option = st.selectbox(
    'Which number do you like best?',
     df['first column'])

st.write('You selected: ', option)

text1 = st.text('This is some text.')

if st.button('Say hello'):
    st.write('Why hello there')
else:
    st.write('Goodbye')


agree = st.checkbox('I agree')

if agree:
    st.write('Great!')
    
age = st.slider('How old are you?', 0, 130, 25)

st.write("I'm ", age, 'years old')

title = st.text_input('Movie title', 'Life of Brian')

st.write('The current movie title is', title)

number = st.number_input('Insert a number')

st.write('The current number is ', number)