Spaces:

Bigshot
/

Semantic-Steam-Search

Sleeping

Acorn-Studios commited on Jun 4

Commit

dc3bdff

1 Parent(s): f43a620

fixes

Files changed (3) hide show

clean.py ADDED Viewed

+#clean descriptions_cleaned.csv to only keep the first column. If we don't find a row, we set it -1
+import pandas as pd
+def clean_descriptions(input_file, output_file):
+    df = pd.read_csv(input_file, header=None, on_bad_lines='skip')
+    df_cleaned = df.iloc[:, [0]]
+    df_cleaned.fillna(-1, inplace=True)
+    df_cleaned.to_csv(output_file, header=None, index=None)
+if __name__ == "__main__":
+    input_file = "descriptions_cleaned.csv"
+    output_file = "descriptions.csv"
+    clean_descriptions(input_file, output_file)
+    print(f"Cleaned descriptions saved to {output_file}")

descriptions.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

use.py CHANGED Viewed

@@ -6,7 +6,7 @@ class SteamTransformer:
     def __init__(self):
         self.model = SentenceTransformer("all-MiniLM-L6-v2")
         self.embeddings = pkl.load(open("embeddings.pkl", "rb"))
-        csv_file = "descriptions_cleaned.csv"
         with open (csv_file, 'r', encoding='utf-8') as file:
             reader = csv.reader(file)
             self.labeled = [row for row in reader]

     def __init__(self):
         self.model = SentenceTransformer("all-MiniLM-L6-v2")
         self.embeddings = pkl.load(open("embeddings.pkl", "rb"))
+        csv_file = "descriptions.csv"
         with open (csv_file, 'r', encoding='utf-8') as file:
             reader = csv.reader(file)
             self.labeled = [row for row in reader]