etechoptimist commited on
Commit
7e9cd65
·
1 Parent(s): c61c790

Updating to distilbert-base-uncased-finetuned-sst-2-english

Browse files
Files changed (1) hide show
  1. app.py +5 -8
app.py CHANGED
@@ -19,19 +19,16 @@ def anomalies_detector(logs: str) -> list[tuple[int, str]]:
19
  Returns:
20
  list[tuple[int, str]]: List of tuples containing (line_number, anomalous_text)
21
  """
22
- # Initialize the text classification pipeline with a model specialized in log analysis
23
  classifier = pipeline(
24
  "text-classification",
25
- model="microsoft/codebert-base", # Using CodeBERT which is better for technical text
26
  top_k=2 # Get both normal and anomalous probabilities
27
  )
28
- if logs == "":
29
- return []
30
  # Split logs into lines
31
  log_lines = logs.split('\n')
32
  anomalies = []
33
- if len(log_lines) == 0:
34
- return []
35
 
36
  # Process each line
37
  for line_num, line in enumerate(log_lines, 1):
@@ -42,9 +39,9 @@ def anomalies_detector(logs: str) -> list[tuple[int, str]]:
42
  results = classifier(line)
43
 
44
  # Check if the line is classified as anomalous
45
- # CodeBERT returns probabilities for both classes
46
  for result in results:
47
- if result['label'] == 'LABEL_1' and result['score'] > 0.7:
48
  anomalies.append((line_num, line))
49
  break
50
 
 
19
  Returns:
20
  list[tuple[int, str]]: List of tuples containing (line_number, anomalous_text)
21
  """
22
+ # Initialize the text classification pipeline with a model specialized in text classification
23
  classifier = pipeline(
24
  "text-classification",
25
+ model="distilbert-base-uncased-finetuned-sst-2-english", # Using a model fine-tuned for sentiment/classification
26
  top_k=2 # Get both normal and anomalous probabilities
27
  )
28
+
 
29
  # Split logs into lines
30
  log_lines = logs.split('\n')
31
  anomalies = []
 
 
32
 
33
  # Process each line
34
  for line_num, line in enumerate(log_lines, 1):
 
39
  results = classifier(line)
40
 
41
  # Check if the line is classified as anomalous
42
+ # The model returns probabilities for both classes
43
  for result in results:
44
+ if result['label'] == 'NEGATIVE' and result['score'] > 0.7: # NEGATIVE indicates potential anomaly
45
  anomalies.append((line_num, line))
46
  break
47