ciyidogan commited on
Commit
524896a
·
verified ·
1 Parent(s): cf38b6d

Delete ssml_converter.py

Browse files
Files changed (1) hide show
  1. ssml_converter.py +0 -151
ssml_converter.py DELETED
@@ -1,151 +0,0 @@
1
- # ssml_converter.py
2
- """
3
- SSML (Speech Synthesis Markup Language) Converter
4
- """
5
-
6
- import re
7
- from typing import Dict, Optional
8
- from datetime import datetime
9
- import xml.etree.ElementTree as ET
10
- from xml.sax.saxutils import escape
11
-
12
- class SSMLConverter:
13
- """Convert plain text to SSML format"""
14
-
15
- def __init__(self, language: str = "tr-TR"):
16
- self.language = language
17
-
18
- def convert_to_ssml(self, text: str, options: Dict[str, any] = None) -> str:
19
- """Convert plain text to SSML with smart detection"""
20
-
21
- # Create root speak element
22
- speak = ET.Element("speak")
23
- speak.set("version", "1.0")
24
- speak.set("xml:lang", self.language)
25
-
26
- # Process text segments
27
- segments = self._segment_text(text)
28
-
29
- for segment in segments:
30
- if segment["type"] == "plain":
31
- # Add plain text (escaped)
32
- if segment["text"].strip():
33
- speak.text = (speak.text or "") + escape(segment["text"])
34
-
35
- elif segment["type"] == "number":
36
- # Add number with say-as
37
- say_as = ET.SubElement(speak, "say-as")
38
- say_as.set("interpret-as", "cardinal")
39
- say_as.text = segment["text"]
40
-
41
- elif segment["type"] == "currency":
42
- # Add currency with say-as
43
- say_as = ET.SubElement(speak, "say-as")
44
- say_as.set("interpret-as", "currency")
45
- say_as.text = segment["text"]
46
-
47
- elif segment["type"] == "time":
48
- # Add time with say-as
49
- say_as = ET.SubElement(speak, "say-as")
50
- say_as.set("interpret-as", "time")
51
- say_as.set("format", "hms24")
52
- say_as.text = segment["text"]
53
-
54
- elif segment["type"] == "date":
55
- # Add date with say-as
56
- say_as = ET.SubElement(speak, "say-as")
57
- say_as.set("interpret-as", "date")
58
- say_as.set("format", "ymd")
59
- say_as.text = segment["text"]
60
-
61
- elif segment["type"] == "code":
62
- # Spell out codes
63
- say_as = ET.SubElement(speak, "say-as")
64
- say_as.set("interpret-as", "characters")
65
- say_as.text = segment["text"]
66
-
67
- elif segment["type"] == "pause":
68
- # Add break for punctuation
69
- break_elem = ET.SubElement(speak, "break")
70
- break_elem.set("time", segment["duration"])
71
-
72
- # Convert to string
73
- return ET.tostring(speak, encoding='unicode', method='xml')
74
-
75
- def _segment_text(self, text: str) -> list:
76
- """Segment text into different types for SSML processing"""
77
- segments = []
78
-
79
- # Patterns for different content types
80
- patterns = {
81
- 'currency': r'[₺$€£]\s*\d+(?:[.,]\d+)?|\d+(?:[.,]\d+)?\s*(?:TL|USD|EUR|GBP)',
82
- 'time': r'\b\d{1,2}:\d{2}(?::\d{2})?\b',
83
- 'date': r'\b\d{4}-\d{2}-\d{2}\b|\b\d{1,2}[./]\d{1,2}[./]\d{2,4}\b',
84
- 'code': r'\b[A-Z]{2,5}\d{2,5}\b',
85
- 'number': r'\b\d+(?:[.,]\d+)?\b',
86
- 'pause': r'\.{3}|--'
87
- }
88
-
89
- # Combined pattern
90
- combined_pattern = '|'.join(f'(?P<{name}>{pattern})' for name, pattern in patterns.items())
91
-
92
- last_end = 0
93
-
94
- for match in re.finditer(combined_pattern, text):
95
- # Add text before match
96
- if match.start() > last_end:
97
- segments.append({
98
- 'type': 'plain',
99
- 'text': text[last_end:match.start()]
100
- })
101
-
102
- # Determine match type and add
103
- for type_name, group_text in match.groupdict().items():
104
- if group_text:
105
- if type_name == 'pause':
106
- segments.append({
107
- 'type': 'pause',
108
- 'duration': '500ms' if group_text == '...' else '1s'
109
- })
110
- else:
111
- segments.append({
112
- 'type': type_name,
113
- 'text': group_text
114
- })
115
- break
116
-
117
- last_end = match.end()
118
-
119
- # Add remaining text
120
- if last_end < len(text):
121
- segments.append({
122
- 'type': 'plain',
123
- 'text': text[last_end:]
124
- })
125
-
126
- return segments
127
-
128
- def add_emphasis(self, text: str, words: list, level: str = "moderate") -> str:
129
- """Add emphasis to specific words in SSML"""
130
- ssml_text = self.convert_to_ssml(text)
131
-
132
- # Parse SSML
133
- root = ET.fromstring(ssml_text)
134
-
135
- # Add emphasis to matching words
136
- for elem in root.iter():
137
- if elem.text and any(word in elem.text for word in words):
138
- for word in words:
139
- if word in elem.text:
140
- # Create emphasis element
141
- parts = elem.text.split(word, 1)
142
- elem.text = parts[0]
143
-
144
- emphasis = ET.SubElement(elem, "emphasis")
145
- emphasis.set("level", level)
146
- emphasis.text = word
147
-
148
- if len(parts) > 1:
149
- emphasis.tail = parts[1]
150
-
151
- return ET.tostring(root, encoding='unicode', method='xml')