Spaces:
Building
Building
File size: 5,834 Bytes
cf38b6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# ssml_converter.py
"""
SSML (Speech Synthesis Markup Language) Converter
"""
import re
from typing import Dict, Optional
from datetime import datetime
import xml.etree.ElementTree as ET
from xml.sax.saxutils import escape
class SSMLConverter:
"""Convert plain text to SSML format"""
def __init__(self, language: str = "tr-TR"):
self.language = language
def convert_to_ssml(self, text: str, options: Dict[str, any] = None) -> str:
"""Convert plain text to SSML with smart detection"""
# Create root speak element
speak = ET.Element("speak")
speak.set("version", "1.0")
speak.set("xml:lang", self.language)
# Process text segments
segments = self._segment_text(text)
for segment in segments:
if segment["type"] == "plain":
# Add plain text (escaped)
if segment["text"].strip():
speak.text = (speak.text or "") + escape(segment["text"])
elif segment["type"] == "number":
# Add number with say-as
say_as = ET.SubElement(speak, "say-as")
say_as.set("interpret-as", "cardinal")
say_as.text = segment["text"]
elif segment["type"] == "currency":
# Add currency with say-as
say_as = ET.SubElement(speak, "say-as")
say_as.set("interpret-as", "currency")
say_as.text = segment["text"]
elif segment["type"] == "time":
# Add time with say-as
say_as = ET.SubElement(speak, "say-as")
say_as.set("interpret-as", "time")
say_as.set("format", "hms24")
say_as.text = segment["text"]
elif segment["type"] == "date":
# Add date with say-as
say_as = ET.SubElement(speak, "say-as")
say_as.set("interpret-as", "date")
say_as.set("format", "ymd")
say_as.text = segment["text"]
elif segment["type"] == "code":
# Spell out codes
say_as = ET.SubElement(speak, "say-as")
say_as.set("interpret-as", "characters")
say_as.text = segment["text"]
elif segment["type"] == "pause":
# Add break for punctuation
break_elem = ET.SubElement(speak, "break")
break_elem.set("time", segment["duration"])
# Convert to string
return ET.tostring(speak, encoding='unicode', method='xml')
def _segment_text(self, text: str) -> list:
"""Segment text into different types for SSML processing"""
segments = []
# Patterns for different content types
patterns = {
'currency': r'[₺$€£]\s*\d+(?:[.,]\d+)?|\d+(?:[.,]\d+)?\s*(?:TL|USD|EUR|GBP)',
'time': r'\b\d{1,2}:\d{2}(?::\d{2})?\b',
'date': r'\b\d{4}-\d{2}-\d{2}\b|\b\d{1,2}[./]\d{1,2}[./]\d{2,4}\b',
'code': r'\b[A-Z]{2,5}\d{2,5}\b',
'number': r'\b\d+(?:[.,]\d+)?\b',
'pause': r'\.{3}|--'
}
# Combined pattern
combined_pattern = '|'.join(f'(?P<{name}>{pattern})' for name, pattern in patterns.items())
last_end = 0
for match in re.finditer(combined_pattern, text):
# Add text before match
if match.start() > last_end:
segments.append({
'type': 'plain',
'text': text[last_end:match.start()]
})
# Determine match type and add
for type_name, group_text in match.groupdict().items():
if group_text:
if type_name == 'pause':
segments.append({
'type': 'pause',
'duration': '500ms' if group_text == '...' else '1s'
})
else:
segments.append({
'type': type_name,
'text': group_text
})
break
last_end = match.end()
# Add remaining text
if last_end < len(text):
segments.append({
'type': 'plain',
'text': text[last_end:]
})
return segments
def add_emphasis(self, text: str, words: list, level: str = "moderate") -> str:
"""Add emphasis to specific words in SSML"""
ssml_text = self.convert_to_ssml(text)
# Parse SSML
root = ET.fromstring(ssml_text)
# Add emphasis to matching words
for elem in root.iter():
if elem.text and any(word in elem.text for word in words):
for word in words:
if word in elem.text:
# Create emphasis element
parts = elem.text.split(word, 1)
elem.text = parts[0]
emphasis = ET.SubElement(elem, "emphasis")
emphasis.set("level", level)
emphasis.text = word
if len(parts) > 1:
emphasis.tail = parts[1]
return ET.tostring(root, encoding='unicode', method='xml') |