Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -200,6 +200,91 @@ def call_model(model_prompt):
|
|
200 |
return completion.choices[0].message.content.strip()
|
201 |
|
202 |
def correct_dash_usage(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
return text
|
204 |
|
205 |
def clean_message(message):
|
|
|
200 |
return completion.choices[0].message.content.strip()
|
201 |
|
202 |
def correct_dash_usage(text):
|
203 |
+
morph = pymorphy3.MorphAnalyzer()
|
204 |
+
text = re.sub(r'\s[-–—]\s', ' — ', text)
|
205 |
+
text = re.sub(r'(?<=\d)[-–—](?=\d)', '–', text)
|
206 |
+
text = re.sub(r'(?<=[a-zA-Zа-яА-Я0-9])[-–—](?=[a-zA-Zа-яА-Я0-9])', '-', text)
|
207 |
+
text = re.sub(r'"([^\"]+)"', r'«\1»', text)
|
208 |
+
if text.count('"') == 1:
|
209 |
+
text = text.replace('"', '')
|
210 |
+
if (text.startswith('"') and text.endswith('"')) or (text.startswith('«') and text.endswith('»')):
|
211 |
+
text = text[1:-1].strip()
|
212 |
+
text = re.sub(r'(\d+)[kкКK]', r'\1 000', text, flags=re.IGNORECASE)
|
213 |
+
greeting_patterns = [
|
214 |
+
r"привет\b", r"здравствуй", r"добрый\s(день|вечер|утро)",
|
215 |
+
r"дорогой\b", r"уважаемый\b", r"дорогая\b", r"уважаемая\b",
|
216 |
+
r"господин\b", r"госпожа\b", r"друг\b", r"коллега\b",
|
217 |
+
r"товарищ\b", r"приятель\b", r"подруга\b"
|
218 |
+
]
|
219 |
+
|
220 |
+
def is_greeting_sentence(sentence):
|
221 |
+
words = sentence.split()
|
222 |
+
if len(words) < 5:
|
223 |
+
for word in words:
|
224 |
+
parsed = morph.parse(word.lower())[0]
|
225 |
+
for pattern in greeting_patterns:
|
226 |
+
if re.search(pattern, parsed.normal_form):
|
227 |
+
return True
|
228 |
+
return False
|
229 |
+
|
230 |
+
sentences = re.split(r'(?<=[.!?])\s+', text)
|
231 |
+
if sentences and is_greeting_sentence(sentences[0]):
|
232 |
+
sentences = sentences[1:]
|
233 |
+
text = ' '.join(sentences)
|
234 |
+
|
235 |
+
def restore_yo(text):
|
236 |
+
morph = pymorphy3.MorphAnalyzer()
|
237 |
+
words = text.split()
|
238 |
+
restored_words = []
|
239 |
+
for word in words:
|
240 |
+
if word.isupper():
|
241 |
+
restored_words.append(word)
|
242 |
+
continue
|
243 |
+
if word.lower() == "все":
|
244 |
+
restored_words.append(word)
|
245 |
+
continue
|
246 |
+
parsed = morph.parse(word)[0]
|
247 |
+
restored_word = parsed.word
|
248 |
+
if word and word[0].isupper():
|
249 |
+
restored_word = restored_word.capitalize()
|
250 |
+
restored_words.append(restored_word)
|
251 |
+
return ' '.join(restored_words)
|
252 |
+
|
253 |
+
text = restore_yo(text)
|
254 |
+
text = re.sub(r'\bИп\b', 'ИП', text, flags=re.IGNORECASE)
|
255 |
+
text = re.sub(r'\bОоо\b', 'ООО', text, flags=re.IGNORECASE)
|
256 |
+
text = re.sub(r'\bРф\b', 'РФ', text, flags=re.IGNORECASE)
|
257 |
+
text = re.sub(r'\bпользовуйтесь\b', 'пользуйтесь', text, flags=re.IGNORECASE)
|
258 |
+
text = re.sub(r'\bею\b', 'ей', text, flags=re.IGNORECASE)
|
259 |
+
text = re.sub(r'\bповышьте\b', 'повысьте', text, flags=re.IGNORECASE)
|
260 |
+
text = re.sub(r'\bСбербизнес\b', 'СберБизнес', text, flags=re.IGNORECASE)
|
261 |
+
text = re.sub(r'\bСбербизнеса\b', 'СберБизнес', text, flags=re.IGNORECASE)
|
262 |
+
text = re.sub(r'\bСбербизнесе\b', 'СберБизнес', text, flags=re.IGNORECASE)
|
263 |
+
text = re.sub(r'\bСбербанк\b', 'СберБанк', text, flags=re.IGNORECASE)
|
264 |
+
text = re.sub(r'\bвашего ООО\b', 'вашей компании', text, flags=re.IGNORECASE)
|
265 |
+
text = re.sub(r'\b0₽\b', '0 р', text, flags=re.IGNORECASE)
|
266 |
+
text = re.sub(r'\b₽\b', 'р', text, flags=re.IGNORECASE)
|
267 |
+
text = re.sub(r'\bруб\.(?=\W|$)', 'р', text, flags=re.IGNORECASE)
|
268 |
+
text = re.sub(r'\bруб(?:ля|лей)\b', 'р', text, flags=re.IGNORECASE)
|
269 |
+
text = re.sub(r'(\d+)\s+тысяч(?:а|и)?(?:\s+рублей)?', r'\1 000 р', text, flags=re.IGNORECASE)
|
270 |
+
text = re.sub(r'(\d+)\s*тыс\.\s*руб\.', r'\1 000 р', text, flags=re.IGNORECASE)
|
271 |
+
text = re.sub(r'(\d+)\s*тыс\.\s*р\.', r'\1 000 р', text, flags=re.IGNORECASE)
|
272 |
+
text = re.sub(r'(\d+)\s*тыс\.\s*р', r'\1 000 р', text, flags=re.IGNORECASE)
|
273 |
+
text = re.sub(r'(\d+)\s+миллиона\b|\bмиллионов\b', r'\1 млн', text, flags=re.IGNORECASE)
|
274 |
+
text = re.sub(r'(\d+)\s*млн\s*руб\.', r'\1 млн р', text, flags=re.IGNORECASE)
|
275 |
+
text = re.sub(r'(\d+)\s*р\b', r'\1 р', text)
|
276 |
+
|
277 |
+
def remove_specific_sentences(text):
|
278 |
+
sentences = re.split(r'(?<=[.!?])\s+', text)
|
279 |
+
filtered_sentences = [
|
280 |
+
sentence for sentence in sentences
|
281 |
+
if not re.search(r'\bникаких\s+(посещений|визитов)\b', sentence, re.IGNORECASE)
|
282 |
+
]
|
283 |
+
return ' '.join(filtered_sentences)
|
284 |
+
|
285 |
+
text = re.sub(r'\b(\d+)\s+000\s+000\s*р\b', r'\1 млн р', text, flags=re.IGNORECASE)
|
286 |
+
text = re.sub(r' р р ', r' р ', text, flags=re.IGNORECASE)
|
287 |
+
text = remove_specific_sentences(text)
|
288 |
return text
|
289 |
|
290 |
def clean_message(message):
|