Spaces:
Runtime error
Runtime error
Update util/preprocessing.py
Browse files- util/preprocessing.py +48 -0
util/preprocessing.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import difflib
|
2 |
import pandas as pd
|
|
|
|
|
3 |
|
4 |
def word_to_market_name(word):
|
5 |
markets_df = pd.read_csv('data/market_name_utf8.csv')
|
@@ -28,6 +30,37 @@ def word_to_market_name(word):
|
|
28 |
return output
|
29 |
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
def check_word(word):
|
33 |
markets_df = pd.read_csv('data/market_name_utf8.csv')
|
@@ -38,4 +71,19 @@ def check_word(word):
|
|
38 |
print(f"check_word, {word}")
|
39 |
return True
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
return False
|
|
|
1 |
import difflib
|
2 |
import pandas as pd
|
3 |
+
from util.search_data import *
|
4 |
+
|
5 |
|
6 |
def word_to_market_name(word):
|
7 |
markets_df = pd.read_csv('data/market_name_utf8.csv')
|
|
|
30 |
return output
|
31 |
|
32 |
|
33 |
+
def word_to_product_name(word):
|
34 |
+
if not os.path.exists("data/products.txt"):
|
35 |
+
products = get_all_product_names()
|
36 |
+
else:
|
37 |
+
temp = ''
|
38 |
+
with open("data/products.txt", "r", encoding = "utf-8") as f:
|
39 |
+
temp = f.read()
|
40 |
+
products = temp.split("\n")[:-2]
|
41 |
+
|
42 |
+
output = []
|
43 |
+
scores = dict()
|
44 |
+
|
45 |
+
for p in products:
|
46 |
+
flag = True
|
47 |
+
for c in range(len(word)):
|
48 |
+
if c < len(p):
|
49 |
+
if p[c] != word[c]:
|
50 |
+
flag = False
|
51 |
+
if flag:
|
52 |
+
output.append(p)
|
53 |
+
else:
|
54 |
+
sm = difflib.SequenceMatcher(None, word, p)
|
55 |
+
scores[p] = sm.ratio()
|
56 |
+
sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
|
57 |
+
top_3_product = [product[0] for product in sorted_scores[:3]]
|
58 |
+
for i in range(len(top_3_product)):
|
59 |
+
output.append(top_3_product[i])
|
60 |
+
|
61 |
+
return output
|
62 |
+
|
63 |
+
|
64 |
|
65 |
def check_word(word):
|
66 |
markets_df = pd.read_csv('data/market_name_utf8.csv')
|
|
|
71 |
print(f"check_word, {word}")
|
72 |
return True
|
73 |
|
74 |
+
return False
|
75 |
+
|
76 |
+
def check_product(word):
|
77 |
+
if not os.path.exists("data/products.txt"):
|
78 |
+
products = get_all_product_names()
|
79 |
+
else:
|
80 |
+
temp = ''
|
81 |
+
with open("data/products.txt", "r", encoding = "utf-8") as f:
|
82 |
+
temp = f.read()
|
83 |
+
products = temp.split("\n")[:-2]
|
84 |
+
for p in products:
|
85 |
+
if word == p:
|
86 |
+
print(f"check_word, {word}")
|
87 |
+
return True
|
88 |
+
|
89 |
return False
|