Spaces:
Running
Running
Update process_hf_dataset.py
Browse files- process_hf_dataset.py +8 -6
process_hf_dataset.py
CHANGED
@@ -120,13 +120,15 @@ def generate_semantic_vector_og(description, total_lines=100):
|
|
120 |
|
121 |
return vector
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
def generate_semantic_vector(description, total_lines=100, use_gpu=False):
|
124 |
-
|
125 |
-
# Load CodeBERT model and tokenizer
|
126 |
-
model_name = "microsoft/codebert-base"
|
127 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
128 |
-
device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
|
129 |
-
model = AutoModel.from_pretrained(model_name).to(device)
|
130 |
|
131 |
# Tokenize and encode the description
|
132 |
inputs = tokenizer(description, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
|
|
120 |
|
121 |
return vector
|
122 |
|
123 |
+
|
124 |
+
"""Generate a 6D semantic vector for a textual description using CodeBERT, projecting to 6D."""
|
125 |
+
# Load CodeBERT model and tokenizer
|
126 |
+
model_name = "microsoft/codebert-base"
|
127 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
128 |
+
device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
|
129 |
+
model = AutoModel.from_pretrained(model_name).to(device)
|
130 |
def generate_semantic_vector(description, total_lines=100, use_gpu=False):
|
131 |
+
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
# Tokenize and encode the description
|
134 |
inputs = tokenizer(description, return_tensors="pt", padding=True, truncation=True, max_length=512)
|