File size: 5,834 Bytes
cf38b6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# ssml_converter.py
"""

SSML (Speech Synthesis Markup Language) Converter

"""

import re
from typing import Dict, Optional
from datetime import datetime
import xml.etree.ElementTree as ET
from xml.sax.saxutils import escape

class SSMLConverter:
    """Convert plain text to SSML format"""
    
    def __init__(self, language: str = "tr-TR"):
        self.language = language
        
    def convert_to_ssml(self, text: str, options: Dict[str, any] = None) -> str:
        """Convert plain text to SSML with smart detection"""
        
        # Create root speak element
        speak = ET.Element("speak")
        speak.set("version", "1.0")
        speak.set("xml:lang", self.language)
        
        # Process text segments
        segments = self._segment_text(text)
        
        for segment in segments:
            if segment["type"] == "plain":
                # Add plain text (escaped)
                if segment["text"].strip():
                    speak.text = (speak.text or "") + escape(segment["text"])
                    
            elif segment["type"] == "number":
                # Add number with say-as
                say_as = ET.SubElement(speak, "say-as")
                say_as.set("interpret-as", "cardinal")
                say_as.text = segment["text"]
                
            elif segment["type"] == "currency":
                # Add currency with say-as
                say_as = ET.SubElement(speak, "say-as")
                say_as.set("interpret-as", "currency")
                say_as.text = segment["text"]
                
            elif segment["type"] == "time":
                # Add time with say-as
                say_as = ET.SubElement(speak, "say-as")
                say_as.set("interpret-as", "time")
                say_as.set("format", "hms24")
                say_as.text = segment["text"]
                
            elif segment["type"] == "date":
                # Add date with say-as
                say_as = ET.SubElement(speak, "say-as")
                say_as.set("interpret-as", "date")
                say_as.set("format", "ymd")
                say_as.text = segment["text"]
                
            elif segment["type"] == "code":
                # Spell out codes
                say_as = ET.SubElement(speak, "say-as")
                say_as.set("interpret-as", "characters")
                say_as.text = segment["text"]
                
            elif segment["type"] == "pause":
                # Add break for punctuation
                break_elem = ET.SubElement(speak, "break")
                break_elem.set("time", segment["duration"])
        
        # Convert to string
        return ET.tostring(speak, encoding='unicode', method='xml')
    
    def _segment_text(self, text: str) -> list:
        """Segment text into different types for SSML processing"""
        segments = []
        
        # Patterns for different content types
        patterns = {
            'currency': r'[₺$€£]\s*\d+(?:[.,]\d+)?|\d+(?:[.,]\d+)?\s*(?:TL|USD|EUR|GBP)',
            'time': r'\b\d{1,2}:\d{2}(?::\d{2})?\b',
            'date': r'\b\d{4}-\d{2}-\d{2}\b|\b\d{1,2}[./]\d{1,2}[./]\d{2,4}\b',
            'code': r'\b[A-Z]{2,5}\d{2,5}\b',
            'number': r'\b\d+(?:[.,]\d+)?\b',
            'pause': r'\.{3}|--'
        }
        
        # Combined pattern
        combined_pattern = '|'.join(f'(?P<{name}>{pattern})' for name, pattern in patterns.items())
        
        last_end = 0
        
        for match in re.finditer(combined_pattern, text):
            # Add text before match
            if match.start() > last_end:
                segments.append({
                    'type': 'plain',
                    'text': text[last_end:match.start()]
                })
            
            # Determine match type and add
            for type_name, group_text in match.groupdict().items():
                if group_text:
                    if type_name == 'pause':
                        segments.append({
                            'type': 'pause',
                            'duration': '500ms' if group_text == '...' else '1s'
                        })
                    else:
                        segments.append({
                            'type': type_name,
                            'text': group_text
                        })
                    break
            
            last_end = match.end()
        
        # Add remaining text
        if last_end < len(text):
            segments.append({
                'type': 'plain',
                'text': text[last_end:]
            })
        
        return segments
    
    def add_emphasis(self, text: str, words: list, level: str = "moderate") -> str:
        """Add emphasis to specific words in SSML"""
        ssml_text = self.convert_to_ssml(text)
        
        # Parse SSML
        root = ET.fromstring(ssml_text)
        
        # Add emphasis to matching words
        for elem in root.iter():
            if elem.text and any(word in elem.text for word in words):
                for word in words:
                    if word in elem.text:
                        # Create emphasis element
                        parts = elem.text.split(word, 1)
                        elem.text = parts[0]
                        
                        emphasis = ET.SubElement(elem, "emphasis")
                        emphasis.set("level", level)
                        emphasis.text = word
                        
                        if len(parts) > 1:
                            emphasis.tail = parts[1]
        
        return ET.tostring(root, encoding='unicode', method='xml')