Spaces:

etechoptimist
/

software_anomalies

Running

etechoptimist commited on Jun 5

Commit

7e9cd65

1 Parent(s): c61c790

Updating to distilbert-base-uncased-finetuned-sst-2-english

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,19 +19,16 @@ def anomalies_detector(logs: str) -> list[tuple[int, str]]:
     Returns:
         list[tuple[int, str]]: List of tuples containing (line_number, anomalous_text)
     """
-    # Initialize the text classification pipeline with a model specialized in log analysis
     classifier = pipeline(
         "text-classification",
-        model="microsoft/codebert-base",  # Using CodeBERT which is better for technical text
         top_k=2  # Get both normal and anomalous probabilities
     )
-    if logs == "":
-        return []
     # Split logs into lines
     log_lines = logs.split('\n')
     anomalies = []
-    if len(log_lines) == 0:
-        return []
     # Process each line
     for line_num, line in enumerate(log_lines, 1):
@@ -42,9 +39,9 @@ def anomalies_detector(logs: str) -> list[tuple[int, str]]:
         results = classifier(line)
         # Check if the line is classified as anomalous
-        # CodeBERT returns probabilities for both classes
         for result in results:
-            if result['label'] == 'LABEL_1' and result['score'] > 0.7:
                 anomalies.append((line_num, line))
                 break

     Returns:
         list[tuple[int, str]]: List of tuples containing (line_number, anomalous_text)
     """
+    # Initialize the text classification pipeline with a model specialized in text classification
     classifier = pipeline(
         "text-classification",
+        model="distilbert-base-uncased-finetuned-sst-2-english",  # Using a model fine-tuned for sentiment/classification
         top_k=2  # Get both normal and anomalous probabilities
     )
     # Split logs into lines
     log_lines = logs.split('\n')
     anomalies = []
     # Process each line
     for line_num, line in enumerate(log_lines, 1):
         results = classifier(line)
         # Check if the line is classified as anomalous
+        # The model returns probabilities for both classes
         for result in results:
+            if result['label'] == 'NEGATIVE' and result['score'] > 0.7:  # NEGATIVE indicates potential anomaly
                 anomalies.append((line_num, line))
                 break