Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
import nltk | |
from nltk.stem.snowball import SnowballStemmer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import streamlit as st | |
from PIL import Image | |
nltk.download('punkt') | |
# Download required NLTK data | |
try: | |
nltk.download('punkt') | |
except: | |
pass | |
# Load the dataset | |
data = pd.read_csv('amazon_product.csv') | |
# Remove unnecessary columns | |
data = data.drop('id', axis=1) | |
# tokenizer and stemmer | |
stemmer = SnowballStemmer('english') | |
def tokenize_and_stem(text): | |
tokens = nltk.word_tokenize(text.lower()) | |
stems = [stemmer.stem(t) for t in tokens] | |
return stems | |
# stemmed tokens column | |
data['stemmed_tokens'] = data.apply(lambda row: tokenize_and_stem(row['Title'] + ' ' + row['Description']), axis=1) | |
# TF-IDF vectorizer and cosine similarity function | |
tfidf_vectorizer = TfidfVectorizer(tokenizer=tokenize_and_stem) | |
def cosine_sim(text1, text2): | |
# tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2]) | |
text1_concatenated = ' '.join(text1) | |
text2_concatenated = ' '.join(text2) | |
tfidf_matrix = tfidf_vectorizer.fit_transform([text1_concatenated, text2_concatenated]) | |
return cosine_similarity(tfidf_matrix)[0][1] | |
# search function | |
def search_products(query): | |
query_stemmed = tokenize_and_stem(query) | |
data['similarity'] = data['stemmed_tokens'].apply(lambda x: cosine_sim(query_stemmed, x)) | |
results = data.sort_values(by=['similarity'], ascending=False).head(10)[['Title', 'Description', 'Category']] | |
return results | |
# web app | |
img = Image.open('download.png') | |
st.image(img,width=600) | |
st.title("Intelligent Product Finder for Amazon") | |
query = st.text_input("Enter Product Name") | |
sumbit = st.button('Search') | |
if sumbit: | |
res = search_products(query) | |
st.write(res) | |