Spaces:

ldhldh
/

seoul_backend

Runtime error

App Files Files Community

ldhldh commited on May 3, 2024

Commit

7e589bc

verified ·

1 Parent(s): 7178c35

Update util/preprocessing.py

Browse files

Files changed (1) hide show

util/preprocessing.py +48 -0

util/preprocessing.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import difflib
 import pandas as pd
 def word_to_market_name(word):
     markets_df = pd.read_csv('data/market_name_utf8.csv')
@@ -28,6 +30,37 @@ def word_to_market_name(word):
     return output
 def check_word(word):
     markets_df = pd.read_csv('data/market_name_utf8.csv')
@@ -38,4 +71,19 @@ def check_word(word):
             print(f"check_word, {word}")
             return True
     return False

 import difflib
 import pandas as pd
+from util.search_data import *
 def word_to_market_name(word):
     markets_df = pd.read_csv('data/market_name_utf8.csv')
     return output
+def word_to_product_name(word):
+    if not os.path.exists("data/products.txt"):
+        products = get_all_product_names()
+    else:
+        temp = ''
+        with open("data/products.txt", "r", encoding = "utf-8") as f:
+            temp = f.read()
+        products = temp.split("\n")[:-2]
+    output = []
+    scores = dict()
+    for p in products:
+        flag = True
+        for c in range(len(word)):
+            if c < len(p):
+                if p[c] != word[c]:
+                    flag = False
+        if flag:
+            output.append(p)
+        else:
+            sm = difflib.SequenceMatcher(None, word, p)
+            scores[p] = sm.ratio()
+    sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
+    top_3_product = [product[0] for product in sorted_scores[:3]]
+    for i in range(len(top_3_product)):
+        output.append(top_3_product[i])
+    return output
 def check_word(word):
     markets_df = pd.read_csv('data/market_name_utf8.csv')
             print(f"check_word, {word}")
             return True
+    return False
+def check_product(word):
+    if not os.path.exists("data/products.txt"):
+        products = get_all_product_names()
+    else:
+        temp = ''
+        with open("data/products.txt", "r", encoding = "utf-8") as f:
+            temp = f.read()
+        products = temp.split("\n")[:-2]
+    for p in products:
+        if word == p:
+            print(f"check_word, {word}")
+            return True
     return False