kambris commited on
Commit
ecd2a8d
·
verified ·
1 Parent(s): f3a40fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -76,25 +76,26 @@ class SpeechAnalyzer:
76
  self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
77
  self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
78
 
79
- def split_text(self, text, max_length=512):
80
  """Split long text into overlapping segments"""
81
  words = text.split()
82
  segments = []
83
  current_segment = []
84
  current_length = 0
85
-
86
  for word in words:
87
  if current_length + len(word.split()) > max_length:
88
  segments.append(' '.join(current_segment))
 
89
  current_segment = current_segment[-overlap:] + [word]
90
  current_length = len(' '.join(current_segment).split())
91
  else:
92
  current_segment.append(word)
93
  current_length = len(' '.join(current_segment).split())
94
-
95
  if current_segment:
96
  segments.append(' '.join(current_segment))
97
-
98
  return segments
99
 
100
  def analyze_moral_foundations(self, text):
 
76
  self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
77
  self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
78
 
79
+ def split_text(self, text, max_length=512, overlap=50):
80
  """Split long text into overlapping segments"""
81
  words = text.split()
82
  segments = []
83
  current_segment = []
84
  current_length = 0
85
+
86
  for word in words:
87
  if current_length + len(word.split()) > max_length:
88
  segments.append(' '.join(current_segment))
89
+ # Use the overlap parameter from the method arguments
90
  current_segment = current_segment[-overlap:] + [word]
91
  current_length = len(' '.join(current_segment).split())
92
  else:
93
  current_segment.append(word)
94
  current_length = len(' '.join(current_segment).split())
95
+
96
  if current_segment:
97
  segments.append(' '.join(current_segment))
98
+
99
  return segments
100
 
101
  def analyze_moral_foundations(self, text):