Spaces:
Sleeping
Sleeping
Acorn-Studios
commited on
Commit
·
dc3bdff
1
Parent(s):
f43a620
fixes
Browse files- clean.py +13 -0
- descriptions.csv +0 -0
- use.py +1 -1
clean.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#clean descriptions_cleaned.csv to only keep the first column. If we don't find a row, we set it -1
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def clean_descriptions(input_file, output_file):
|
5 |
+
df = pd.read_csv(input_file, header=None, on_bad_lines='skip')
|
6 |
+
df_cleaned = df.iloc[:, [0]]
|
7 |
+
df_cleaned.fillna(-1, inplace=True)
|
8 |
+
df_cleaned.to_csv(output_file, header=None, index=None)
|
9 |
+
if __name__ == "__main__":
|
10 |
+
input_file = "descriptions_cleaned.csv"
|
11 |
+
output_file = "descriptions.csv"
|
12 |
+
clean_descriptions(input_file, output_file)
|
13 |
+
print(f"Cleaned descriptions saved to {output_file}")
|
descriptions.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
use.py
CHANGED
@@ -6,7 +6,7 @@ class SteamTransformer:
|
|
6 |
def __init__(self):
|
7 |
self.model = SentenceTransformer("all-MiniLM-L6-v2")
|
8 |
self.embeddings = pkl.load(open("embeddings.pkl", "rb"))
|
9 |
-
csv_file = "
|
10 |
with open (csv_file, 'r', encoding='utf-8') as file:
|
11 |
reader = csv.reader(file)
|
12 |
self.labeled = [row for row in reader]
|
|
|
6 |
def __init__(self):
|
7 |
self.model = SentenceTransformer("all-MiniLM-L6-v2")
|
8 |
self.embeddings = pkl.load(open("embeddings.pkl", "rb"))
|
9 |
+
csv_file = "descriptions.csv"
|
10 |
with open (csv_file, 'r', encoding='utf-8') as file:
|
11 |
reader = csv.reader(file)
|
12 |
self.labeled = [row for row in reader]
|