File size: 2,323 Bytes
d176aff
 
 
 
339198c
d176aff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e172f19
 
 
d176aff
 
e172f19
 
d176aff
e172f19
 
 
 
d176aff
 
 
 
 
339198c
d176aff
339198c
d176aff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from flask import Flask, render_template, request, jsonify
from nltk.corpus import stopwords
import os
stop = stopwords.words('english')


def text_preprocessing(text):
    # make all words with lower letters
    text = text.lower()
    # getting rid of any punctution
    # text = text.replace('http\S+|www.\S+|@|%|:|,|', '', case=False)
    # spliting each sentence to words to apply previous funtions on them
    word_tokens = text.split(' ')
    keywords = [item for item in word_tokens if item not in stop]
    # assemble words of each sentence again and assign them in new column

    return ' '.join(keywords)

def concat_content(title, value):
    return f"{title}: {value}"

def df_to_text(df):
    text = []
    titles = ["Product ID", "Product Name", "Brand", "Gender", "Price (INR)", "Description", "Primary Color"]
    cols = ["ProductID", "ProductName", "ProductBrand", "Gender", "Price (INR)", "Description", "PrimaryColor"]
    for data in df:
        for title, col in zip(titles, cols):
            text.append(concat_content(title, col))
        text.append('')
    return '\n'.join(text)


df = pd.read_csv("data/dataset.csv").reset_index(drop=True)
embedding_df = pd.read_csv("data/embedding.csv", header=None)
docs = embedding_df.values
HF_TOKEN=os.environ("HF_TOKEN")

model = SentenceTransformer("bert-base-nli-mean-tokens", cache_folder = "/code/", use_auth_token=HF_TOKEN)


app = Flask(__name__)


@app.route("/")
def index():
    return render_template("chat.html")


@app.route("/chat", methods=["POST"])
def chat():
    data = request.get_json()
    msg = data.get("msg")
    try:
        output_df = get_chat_response(msg)
        output_text = df_to_text(output_df)
        return jsonify({"response": True, "message": output_text})
    except Exception as e:
        print(e)
        error_message = f'Error: {str(e)}'
        return jsonify({"message": error_message, "response": False})


def get_chat_response(text):
    query_vector = model.encode(text_preprocessing(text)).astype(float)
    results = util.pytorch_cos_sim(query_vector, docs)
    top_n = 3
    sort_idx = results.argsort(descending=True, axis=1)[0][:top_n]
    return df.iloc[sort_idx]


if __name__ == "__main__":
    app.run(debug=True)