Spaces:
Sleeping
Sleeping
import streamlit as st | |
# To make things easier later, we're also importing numpy and pandas for | |
# working with sample data. | |
import numpy as np | |
import pandas as pd | |
import torch | |
import faiss | |
import numpy as np | |
from transformers import AutoTokenizer, AutoModel | |
# Load the embedding model and tokenizer | |
model_name = "moka-ai/m3e-base" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModel.from_pretrained(model_name) | |
# Generate some random text contents | |
texts = ["This is the first document.", "This is the second document.", "And this is the third one.", "Is this the first document?"] | |
# Convert the text contents to embeddings | |
embeddings = [] | |
for text in texts: | |
input_ids = tokenizer.encode(text, return_tensors="pt") | |
with torch.no_grad(): | |
embedding = model(input_ids)[0][0].numpy() | |
embeddings.append(embedding) | |
embeddings = np.array(embeddings) | |
# Create a Faiss index | |
d = embeddings.shape[1] # Dimension of the embeddings | |
index = faiss.IndexFlatIP(d) # Index that uses inner product (dot product) similarity | |
# Add the embeddings to the index | |
index.add(embeddings) | |
# Search for similar documents | |
query = "This is a new document." | |
input_ids = tokenizer.encode(query, return_tensors="pt") | |
with torch.no_grad(): | |
query_embedding = model(input_ids)[0][0].numpy() | |
k = 2 # Number of similar documents to retrieve | |
D, I = index.search(query_embedding.reshape(1, -1), k) | |
# Print the results | |
st.write(f"Query: {query}") | |
for i in range(k): | |
st.write(f"Rank {i+1}: {texts[I[0][i]]} (similarity score: {D[0][i]})") | |
# Search index for the most similar content | |
k = 5 # Number of results to retrieve | |
D, I = index.search(np.array([question_embedding]), k) | |
# Display the results | |
st.write("Top {} similar content:".format(k)) | |
for i in range(k): | |
st.write("{}: {} : {}".format(i+1, knowledge[I[0][i]], I[0][i])) | |
st.title('My first app') | |
st.write("Here's our first attempt at using data to create a table:") | |
df = pd.DataFrame({ | |
'first column': [1, 2, 3, 4], | |
'second column': [10, 20, 30, 40] | |
}) | |
st.write(df) | |
if st.checkbox('Show dataframe'): | |
chart_data = pd.DataFrame( | |
np.random.randn(20, 3), | |
columns=['a', 'b', 'c']) | |
chart_data | |
option = st.selectbox( | |
'Which number do you like best?', | |
df['first column']) | |
st.write('You selected: ', option) | |
text1 = st.text('This is some text.') | |
if st.button('Say hello'): | |
st.write('Why hello there') | |
else: | |
st.write('Goodbye') | |
agree = st.checkbox('I agree') | |
if agree: | |
st.write('Great!') | |
age = st.slider('How old are you?', 0, 130, 25) | |
st.write("I'm ", age, 'years old') | |
title = st.text_input('Movie title', 'Life of Brian') | |
st.write('The current movie title is', title) | |
number = st.number_input('Insert a number') | |
st.write('The current number is ', number) | |