Spaces:

broadfield-dev
/

parse_py

Sleeping

broadfield-dev commited on Mar 5

Commit

9e89af0

verified ·

1 Parent(s): 4058ab2

Update process_hf_dataset.py

Files changed (1) hide show

process_hf_dataset.py CHANGED Viewed

@@ -120,13 +120,15 @@ def generate_semantic_vector_og(description, total_lines=100):
     return vector
 def generate_semantic_vector(description, total_lines=100, use_gpu=False):
-    """Generate a 6D semantic vector for a textual description using CodeBERT, projecting to 6D."""
-    # Load CodeBERT model and tokenizer
-    model_name = "microsoft/codebert-base"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
-    model = AutoModel.from_pretrained(model_name).to(device)
     # Tokenize and encode the description
     inputs = tokenizer(description, return_tensors="pt", padding=True, truncation=True, max_length=512)

     return vector
+"""Generate a 6D semantic vector for a textual description using CodeBERT, projecting to 6D."""
+# Load CodeBERT model and tokenizer
+model_name = "microsoft/codebert-base"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
+model = AutoModel.from_pretrained(model_name).to(device)
 def generate_semantic_vector(description, total_lines=100, use_gpu=False):
     # Tokenize and encode the description
     inputs = tokenizer(description, return_tensors="pt", padding=True, truncation=True, max_length=512)