Update app.py
Browse files
app.py
CHANGED
@@ -76,25 +76,26 @@ class SpeechAnalyzer:
|
|
76 |
self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
|
77 |
self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
|
78 |
|
79 |
-
def split_text(self, text, max_length=512):
|
80 |
"""Split long text into overlapping segments"""
|
81 |
words = text.split()
|
82 |
segments = []
|
83 |
current_segment = []
|
84 |
current_length = 0
|
85 |
-
|
86 |
for word in words:
|
87 |
if current_length + len(word.split()) > max_length:
|
88 |
segments.append(' '.join(current_segment))
|
|
|
89 |
current_segment = current_segment[-overlap:] + [word]
|
90 |
current_length = len(' '.join(current_segment).split())
|
91 |
else:
|
92 |
current_segment.append(word)
|
93 |
current_length = len(' '.join(current_segment).split())
|
94 |
-
|
95 |
if current_segment:
|
96 |
segments.append(' '.join(current_segment))
|
97 |
-
|
98 |
return segments
|
99 |
|
100 |
def analyze_moral_foundations(self, text):
|
|
|
76 |
self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
|
77 |
self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
|
78 |
|
79 |
+
def split_text(self, text, max_length=512, overlap=50):
|
80 |
"""Split long text into overlapping segments"""
|
81 |
words = text.split()
|
82 |
segments = []
|
83 |
current_segment = []
|
84 |
current_length = 0
|
85 |
+
|
86 |
for word in words:
|
87 |
if current_length + len(word.split()) > max_length:
|
88 |
segments.append(' '.join(current_segment))
|
89 |
+
# Use the overlap parameter from the method arguments
|
90 |
current_segment = current_segment[-overlap:] + [word]
|
91 |
current_length = len(' '.join(current_segment).split())
|
92 |
else:
|
93 |
current_segment.append(word)
|
94 |
current_length = len(' '.join(current_segment).split())
|
95 |
+
|
96 |
if current_segment:
|
97 |
segments.append(' '.join(current_segment))
|
98 |
+
|
99 |
return segments
|
100 |
|
101 |
def analyze_moral_foundations(self, text):
|