File size: 1,250 Bytes
146ba08
 
 
1a8373b
146ba08
9ebc8a4
 
29e2c06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# Importing required libraries
import pandas as pd
import numpy as np 
import streamlit as st
from sentence_transformers import SentenceTransformer, util

st.title("Semantic-Search-Transformer")

# Importing the Data 
df = pd.read_csv('medium_articles.csv')

# Downloading the sentence transformer model

embedder = SentenceTransformer('all-MiniLM-L6-v2')

#Predictions
# User-Test function (prediction_script.py)
# load saved model

all_embeddings = np.load('mediumArticle_embeddings.npy')

# Function

def prediction(query,top_k,corpus_embeddings,df):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=top_k)
    hits = hits[0] # Get the hits for the first query

    print(f"\nTop {top_k} most similar sentences in corpus:")
    for hit in hits:
      hit_id = hit['corpus_id']
      article_data = df.iloc[hit_id]
      title = article_data["title"]
      st.write("-", title, "(Score: {:.4f})".format(hit['score']))

query = 'Artificial Intelligence and Blockchain'
# query = input("Enter the Input Query:-  ")
# top_sent = int(input("Enter the number of similarity sentences you want: "))
top_k = 10
prediction(query,top_k,all_embeddings,df)