Acorn-Studios commited on
Commit
dc3bdff
·
1 Parent(s): f43a620
Files changed (3) hide show
  1. clean.py +13 -0
  2. descriptions.csv +0 -0
  3. use.py +1 -1
clean.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #clean descriptions_cleaned.csv to only keep the first column. If we don't find a row, we set it -1
2
+ import pandas as pd
3
+
4
+ def clean_descriptions(input_file, output_file):
5
+ df = pd.read_csv(input_file, header=None, on_bad_lines='skip')
6
+ df_cleaned = df.iloc[:, [0]]
7
+ df_cleaned.fillna(-1, inplace=True)
8
+ df_cleaned.to_csv(output_file, header=None, index=None)
9
+ if __name__ == "__main__":
10
+ input_file = "descriptions_cleaned.csv"
11
+ output_file = "descriptions.csv"
12
+ clean_descriptions(input_file, output_file)
13
+ print(f"Cleaned descriptions saved to {output_file}")
descriptions.csv ADDED
The diff for this file is too large to render. See raw diff
 
use.py CHANGED
@@ -6,7 +6,7 @@ class SteamTransformer:
6
  def __init__(self):
7
  self.model = SentenceTransformer("all-MiniLM-L6-v2")
8
  self.embeddings = pkl.load(open("embeddings.pkl", "rb"))
9
- csv_file = "descriptions_cleaned.csv"
10
  with open (csv_file, 'r', encoding='utf-8') as file:
11
  reader = csv.reader(file)
12
  self.labeled = [row for row in reader]
 
6
  def __init__(self):
7
  self.model = SentenceTransformer("all-MiniLM-L6-v2")
8
  self.embeddings = pkl.load(open("embeddings.pkl", "rb"))
9
+ csv_file = "descriptions.csv"
10
  with open (csv_file, 'r', encoding='utf-8') as file:
11
  reader = csv.reader(file)
12
  self.labeled = [row for row in reader]