NitinBot001 commited on
Commit
f5ec497
·
verified ·
1 Parent(s): c0974ef

Upload 26 files

Browse files
.env.example ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Server configuration
2
+ HOST=0.0.0.0
3
+ PORT=7000
4
+
5
+ # SSL configuration
6
+ VERIFY_SSL=true
7
+
8
+ # Flask configuration
9
+ FLASK_ENV=production
10
+ FLASK_APP=app.py
.github/ISSUE_TEMPLATE/bug_report.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Go to '...'
16
+ 2. Click on '....'
17
+ 3. Scroll down to '....'
18
+ 4. See error
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Screenshots**
24
+ If applicable, add screenshots to help explain your problem.
25
+
26
+ **Desktop (please complete the following information):**
27
+ - OS: [e.g. iOS]
28
+ - Browser [e.g. chrome, safari]
29
+ - Version [e.g. 22]
30
+
31
+ **Smartphone (please complete the following information):**
32
+ - Device: [e.g. iPhone6]
33
+ - OS: [e.g. iOS8.1]
34
+ - Browser [e.g. stock browser, safari]
35
+ - Version [e.g. 22]
36
+
37
+ **Additional context**
38
+ Add any other context about the problem here.
.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
.github/workflows/docker-build.yml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Docker Build and Push
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ env:
8
+ REGISTRY_DOCKERHUB: docker.io
9
+ REGISTRY_GHCR: ghcr.io
10
+ IMAGE_NAME: ${{ github.repository }}
11
+
12
+ jobs:
13
+ build-and-push:
14
+ runs-on: ubuntu-latest
15
+ permissions:
16
+ contents: read
17
+ packages: write
18
+ steps:
19
+ - name: Checkout repository
20
+ uses: actions/checkout@v4
21
+
22
+ - name: Set up QEMU
23
+ uses: docker/setup-qemu-action@v3
24
+
25
+ - name: Set up Docker Buildx
26
+ uses: docker/setup-buildx-action@v3
27
+ with:
28
+ driver: docker-container
29
+
30
+ - name: Login to Docker Hub
31
+ uses: docker/login-action@v3
32
+ with:
33
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
34
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
35
+
36
+ - name: Login to GitHub Container Registry
37
+ uses: docker/login-action@v3
38
+ with:
39
+ registry: ${{ env.REGISTRY_GHCR }}
40
+ username: ${{ github.actor }}
41
+ password: ${{ secrets.GITHUB_TOKEN }}
42
+
43
+ - name: Extract metadata
44
+ id: meta
45
+ uses: docker/metadata-action@v5
46
+ with:
47
+ images: |
48
+ ${{ secrets.DOCKERHUB_USERNAME }}/ttsfm
49
+ ${{ env.REGISTRY_GHCR }}/${{ env.IMAGE_NAME }}
50
+ tags: |
51
+ type=ref,event=tag
52
+ type=semver,pattern={{version}}
53
+ type=semver,pattern={{major}}.{{minor}}
54
+ type=semver,pattern={{major}}
55
+ type=raw,value=latest
56
+ labels: |
57
+ org.opencontainers.image.source=${{ github.repositoryUrl }}
58
+ org.opencontainers.image.description=Free TTS API server compatible with OpenAI's TTS API format using openai.fm
59
+ org.opencontainers.image.licenses=MIT
60
+ org.opencontainers.image.title=TTSFM - Free TTS API Server
61
+ org.opencontainers.image.vendor=dbcccc
62
+
63
+ - name: Build and push
64
+ id: build-and-push
65
+ uses: docker/build-push-action@v5
66
+ with:
67
+ context: .
68
+ platforms: linux/amd64,linux/arm64
69
+ push: true
70
+ tags: ${{ steps.meta.outputs.tags }}
71
+ labels: ${{ steps.meta.outputs.labels }}
72
+ cache-from: type=gha
73
+ cache-to: type=gha,mode=max
74
+
75
+ - name: Show image info
76
+ run: |
77
+ echo "Pushed tags: ${{ steps.meta.outputs.tags }}"
78
+ echo "Image digest: ${{ steps.build-and-push.outputs.digest }}"
.github/workflows/release.yml ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Release and Publish
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*' # Triggers on version tags like v1.0.0, v3.0.1, etc.
7
+
8
+ permissions:
9
+ contents: write
10
+ id-token: write
11
+
12
+ jobs:
13
+ release-and-publish:
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v4
21
+ with:
22
+ python-version: '3.11'
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install build twine
28
+
29
+ - name: Test package import
30
+ run: |
31
+ pip install -e .
32
+ python -c "import ttsfm; print(f'✅ TTSFM imported successfully')"
33
+ python -c "from ttsfm import TTSClient; print('✅ TTSClient imported successfully')"
34
+
35
+ - name: Build package
36
+ run: |
37
+ python -m build
38
+ echo "📦 Package built successfully"
39
+ ls -la dist/
40
+
41
+ - name: Check package
42
+ run: |
43
+ twine check dist/*
44
+ echo "✅ Package validation passed"
45
+
46
+ - name: Publish to PyPI
47
+ uses: pypa/gh-action-pypi-publish@release/v1
48
+ with:
49
+ password: ${{ secrets.PYPI_API_TOKEN }}
50
+
51
+ - name: Create GitHub Release
52
+ uses: softprops/action-gh-release@v1
53
+ with:
54
+ body: |
55
+ ## 🎉 TTSFM ${{ github.ref_name }}
56
+
57
+ New release of TTSFM - Free Text-to-Speech API with OpenAI compatibility.
58
+
59
+ ### 📦 Installation
60
+ ```bash
61
+ pip install ttsfm==${{ github.ref_name }}
62
+ ```
63
+
64
+ ### 🚀 Quick Start
65
+ ```python
66
+ from ttsfm import TTSClient
67
+
68
+ client = TTSClient()
69
+ response = client.generate_speech("Hello from TTSFM!")
70
+ response.save_to_file("hello")
71
+ ```
72
+
73
+ ### 🐳 Docker
74
+ ```bash
75
+ docker run -p 8000:8000 dbcccc/ttsfm:latest
76
+ ```
77
+
78
+ ### ✨ Features
79
+ - 🆓 Completely free (uses openai.fm service)
80
+ - 🎯 OpenAI-compatible API
81
+ - 🗣️ 11 voices available
82
+ - 🎵 6 audio formats (MP3, WAV, OPUS, AAC, FLAC, PCM)
83
+ - ⚡ Async and sync clients
84
+ - 🌐 Web interface included
85
+ - 🔧 CLI tool available
86
+
87
+ ### 📚 Documentation
88
+ See [README](https://github.com/dbccccccc/ttsfm#readme) for full documentation.
89
+ draft: false
90
+ prerelease: false
.gitignore ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Virtual Environment
25
+ venv/
26
+ env/
27
+ ENV/
28
+ .venv/
29
+
30
+ # Environment variables
31
+ .env
32
+ .env.local
33
+ .env.production
34
+
35
+ # IDE
36
+ .idea/
37
+ .vscode/
38
+ *.swp
39
+ *.swo
40
+ .spyderproject
41
+ .spyproject
42
+
43
+ # OS
44
+ .DS_Store
45
+ .DS_Store?
46
+ ._*
47
+ .Spotlight-V100
48
+ .Trashes
49
+ ehthumbs.db
50
+ Thumbs.db
51
+
52
+ # Generated audio files (for testing)
53
+ *.mp3
54
+ *.wav
55
+ *.opus
56
+ *.aac
57
+ *.flac
58
+ *.pcm
59
+ test_output.*
60
+ output.*
61
+ hello.*
62
+ speech.*
63
+
64
+ # Logs
65
+ *.log
66
+ logs/
67
+ .pytest_cache/
68
+
69
+ # Temporary files
70
+ tmp/
71
+ temp/
72
+ .tmp/
73
+
74
+ # Coverage reports
75
+ htmlcov/
76
+ .coverage
77
+ .coverage.*
78
+ coverage.xml
79
+ *.cover
80
+ .hypothesis/
81
+
82
+ # Documentation builds
83
+ docs/_build/
84
+ site/
85
+
86
+ # Package builds
87
+ *.tar.gz
88
+ *.whl
89
+ dist/
90
+ build/
91
+
92
+ # MyPy
93
+ .mypy_cache/
94
+ .dmypy.json
95
+ dmypy.json
96
+
97
+ # Jupyter Notebook
98
+ .ipynb_checkpoints
99
+
100
+ # pyenv
101
+ .python-version
102
+
103
+ # pipenv
104
+ Pipfile.lock
105
+
106
+ # PEP 582
107
+ __pypackages__/
108
+
109
+ # Celery
110
+ celerybeat-schedule
111
+ celerybeat.pid
112
+
113
+ # SageMath parsed files
114
+ *.sage.py
115
+
116
+ # Rope project settings
117
+ .ropeproject
118
+
119
+ # mkdocs documentation
120
+ /site
121
+
122
+ # Pyre type checker
123
+ .pyre/
124
+
125
+ # Additional exclusions for GitHub
126
+
127
+ # API Keys and Secrets
128
+ config.json
129
+ secrets.json
130
+ .secrets
131
+ api_keys.txt
132
+
133
+ # Database files
134
+ *.db
135
+ *.sqlite
136
+ *.sqlite3
137
+
138
+ # Backup files
139
+ *.bak
140
+ *.backup
141
+ *~
142
+
143
+ # Node.js (if using any JS tools)
144
+ node_modules/
145
+ npm-debug.log*
146
+ yarn-debug.log*
147
+ yarn-error.log*
148
+
149
+ # Docker
150
+ .dockerignore
151
+ Dockerfile.dev
152
+ docker-compose.override.yml
153
+
154
+ # Local configuration
155
+ local_settings.py
156
+ local_config.py
CHANGELOG.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [3.1.0] - 2024-12-19
9
+
10
+ ### 🔧 Format Support Improvements
11
+
12
+ This release focuses on fixing audio format handling and improving format delivery optimization.
13
+
14
+ ### ✨ Added
15
+
16
+ - **Smart Header Selection**: Intelligent HTTP header selection to optimize format delivery from openai.fm service
17
+ - **Format Mapping Functions**: Helper functions for better format handling and optimization
18
+ - **Enhanced Web Interface**: Improved format selection with detailed descriptions for each format
19
+ - **Comprehensive Format Documentation**: Updated README and documentation with complete format information
20
+
21
+ ### 🔄 Changed
22
+
23
+ - **File Naming Logic**: Files are now saved with extensions based on the actual returned format, not the requested format
24
+ - **Enhanced Logging**: Added format-specific log messages for better debugging
25
+ - **Web API Enhancement**: `/api/formats` endpoint now provides detailed information about all supported formats
26
+ - **Documentation Updates**: README and package documentation now include comprehensive format guides
27
+
28
+ ### 🐛 Fixed
29
+
30
+ - **MAJOR FIX**: Resolved file naming issue where files were saved with incorrect double extensions (e.g., `test.wav.mp3`, `test.opus.wav`)
31
+ - **Correct File Extensions**: Files now save with proper single extensions based on actual audio format (e.g., `test.mp3`, `test.wav`)
32
+ - **Format Optimization**: Improved format delivery through smart request optimization
33
+ - **Format Handling**: Better handling of all supported audio formats
34
+
35
+ ### 📝 Technical Details
36
+
37
+ - **Format Optimization**: Smart request optimization to deliver the best quality for each format
38
+ - **Backward Compatibility**: Existing code continues to work unchanged
39
+ - **Enhanced Format Support**: Improved support for all 6 audio formats (MP3, WAV, OPUS, AAC, FLAC, PCM)
40
+
41
+ ## [3.0.0] - 2025-06-06
42
+
43
+ ### 🎉 First Python Package Release
44
+
45
+ This is the first release of TTSFM as an installable Python package. Previous versions (v1.x and v2.x) were service-only releases that provided the API server but not a pip-installable package.
46
+
47
+ ### ✨ Added
48
+
49
+ - **Complete Package Restructure**: Modern Python package structure with proper typing
50
+ - **Async Support**: Full asynchronous client implementation with `asyncio`
51
+ - **OpenAI API Compatibility**: Drop-in replacement for OpenAI TTS API
52
+ - **Type Hints**: Complete type annotation support throughout the codebase
53
+ - **CLI Interface**: Command-line tool for easy TTS generation
54
+ - **Web Application**: Optional Flask-based web interface
55
+ - **Docker Support**: Multi-architecture Docker images (linux/amd64, linux/arm64)
56
+ - **Comprehensive Error Handling**: Detailed exception hierarchy
57
+ - **Multiple Audio Formats**: Support for MP3, WAV, FLAC, and more
58
+ - **Voice Options**: Multiple voice models (alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, shimmer)
59
+ - **Text Processing**: Automatic text length validation and splitting
60
+ - **Rate Limiting**: Built-in rate limiting and retry mechanisms
61
+ - **Configuration**: Environment variable and configuration file support
62
+
63
+ ### 🔧 Technical Improvements
64
+
65
+ - **Modern Build System**: Using `pyproject.toml` with setuptools
66
+ - **GitHub Actions**: Automated Docker builds and PyPI publishing
67
+ - **Development Tools**: Pre-commit hooks, linting, testing setup
68
+ - **Documentation**: Comprehensive README and inline documentation
69
+ - **Package Management**: Proper dependency management with optional extras
70
+
71
+ ### 🌐 API Changes
72
+
73
+ - **Breaking**: Complete API redesign for better usability
74
+ - **OpenAI Compatible**: `/v1/audio/speech` endpoint compatibility
75
+ - **RESTful Design**: Clean REST API design
76
+ - **Health Checks**: Built-in health check endpoints
77
+ - **CORS Support**: Cross-origin resource sharing enabled
78
+
79
+ ### 📦 Installation Options
80
+
81
+ ```bash
82
+ # Basic installation
83
+ pip install ttsfm
84
+
85
+ # With web application support
86
+ pip install ttsfm[web]
87
+
88
+ # With development tools
89
+ pip install ttsfm[dev]
90
+
91
+ # Docker
92
+ docker run -p 8000:8000 ghcr.io/dbccccccc/ttsfm:latest
93
+ ```
94
+
95
+ ### 🚀 Quick Start
96
+
97
+ ```python
98
+ from ttsfm import TTSClient, Voice
99
+
100
+ client = TTSClient()
101
+ response = client.generate_speech(
102
+ text="Hello! This is TTSFM v3.0.0",
103
+ voice=Voice.CORAL
104
+ )
105
+
106
+ with open("speech.mp3", "wb") as f:
107
+ f.write(response.audio_data)
108
+ ```
109
+
110
+ ### 📦 Package vs Service History
111
+
112
+ **Important Note**: This v3.0.0 is the first release of TTSFM as a Python package available on PyPI. Previous versions (v1.x and v2.x) were service/API server releases only and were not available as installable packages.
113
+
114
+ - **v1.x - v2.x**: Service releases (API server only, not pip-installable)
115
+ - **v3.0.0+**: Full Python package releases (pip-installable with service capabilities)
116
+
117
+ ### 🐛 Bug Fixes
118
+
119
+ - Fixed Docker build issues with dependency resolution
120
+ - Improved error handling and user feedback
121
+ - Better handling of long text inputs
122
+ - Enhanced stability and performance
123
+
124
+ ### 📚 Documentation
125
+
126
+ - Complete API documentation
127
+ - Usage examples and tutorials
128
+ - Docker deployment guide
129
+ - Development setup instructions
130
+
131
+ ---
132
+
133
+ ## Previous Service Releases (Not Available as Python Packages)
134
+
135
+ The following versions were service/API server releases only and were not available as pip-installable packages:
136
+
137
+ ### [2.0.0-alpha9] - 2025-04-09
138
+ - Service improvements (alpha release)
139
+
140
+ ### [2.0.0-alpha8] - 2025-04-09
141
+ - Service improvements (alpha release)
142
+
143
+ ### [2.0.0-alpha7] - 2025-04-07
144
+ - Service improvements (alpha release)
145
+
146
+ ### [2.0.0-alpha6] - 2025-04-07
147
+ - Service improvements (alpha release)
148
+
149
+ ### [2.0.0-alpha5] - 2025-04-07
150
+ - Service improvements (alpha release)
151
+
152
+ ### [2.0.0-alpha4] - 2025-04-07
153
+ - Service improvements (alpha release)
154
+
155
+ ### [2.0.0-alpha3] - 2025-04-07
156
+ - Service improvements (alpha release)
157
+
158
+ ### [2.0.0-alpha2] - 2025-04-07
159
+ - Service improvements (alpha release)
160
+
161
+ ### [2.0.0-alpha1] - 2025-04-07
162
+ - Alpha release (DO NOT USE)
163
+
164
+ ### [1.3.0] - 2025-03-28
165
+ - Support for additional audio file formats in the API
166
+ - Alignment with formats supported by the official API
167
+
168
+ ### [1.2.2] - 2025-03-28
169
+ - Fixed Docker support
170
+
171
+ ### [1.2.1] - 2025-03-28
172
+ - Color change for indicator for status
173
+ - Voice preview on webpage for each voice
174
+
175
+ ### [1.2.0] - 2025-03-26
176
+ - Enhanced stability and availability by implementing advanced request handling mechanisms
177
+ - Removed the proxy pool
178
+
179
+ ### [1.1.2] - 2025-03-26
180
+ - Version display on webpage
181
+ - Last version of 1.1.x
182
+
183
+ ### [1.1.1] - 2025-03-26
184
+ - Build fixes
185
+
186
+ ### [1.1.0] - 2025-03-26
187
+ - Project restructuring for better future development experiences
188
+ - Added .env settings
189
+
190
+ ### [1.0.0] - 2025-03-26
191
+ - First service release
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ ENV PYTHONDONTWRITEBYTECODE=1 \
6
+ PYTHONUNBUFFERED=1 \
7
+ PORT=8000
8
+
9
+ # Install dependencies
10
+ RUN apt-get update && apt-get install -y gcc curl && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy source code first
13
+ COPY ttsfm/ ./ttsfm/
14
+ COPY ttsfm-web/ ./ttsfm-web/
15
+ COPY pyproject.toml ./
16
+ COPY requirements.txt ./
17
+
18
+ # Install the TTSFM package with web dependencies
19
+ RUN pip install --no-cache-dir -e .[web]
20
+
21
+ # Install additional web dependencies
22
+ RUN pip install --no-cache-dir python-dotenv>=1.0.0
23
+
24
+ # Create non-root user
25
+ RUN useradd --create-home ttsfm && chown -R ttsfm:ttsfm /app
26
+ USER ttsfm
27
+
28
+ EXPOSE 8000
29
+
30
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
31
+ CMD curl -f http://localhost:8000/api/health || exit 1
32
+
33
+ WORKDIR /app/ttsfm-web
34
+ CMD ["python", "-m", "waitress", "--host=0.0.0.0", "--port=8000", "app:app"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 dbcccc
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pyproject.toml ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ttsfm"
7
+ version = "3.1.0"
8
+ description = "Text-to-Speech API Client with OpenAI compatibility"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ authors = [
12
+ {name = "dbcccc", email = "[email protected]"}
13
+ ]
14
+ maintainers = [
15
+ {name = "dbcccc", email = "[email protected]"}
16
+ ]
17
+ classifiers = [
18
+ "Development Status :: 4 - Beta",
19
+ "Intended Audience :: Developers",
20
+
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.8",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
29
+ "Topic :: Software Development :: Libraries :: Python Modules",
30
+ "Topic :: Internet :: WWW/HTTP :: Dynamic Content",
31
+ ]
32
+ keywords = [
33
+ "tts",
34
+ "text-to-speech",
35
+ "speech-synthesis",
36
+ "openai",
37
+ "api-client",
38
+ "audio",
39
+ "voice",
40
+ "speech"
41
+ ]
42
+ requires-python = ">=3.8"
43
+ dependencies = [
44
+ "requests>=2.25.0",
45
+ "aiohttp>=3.8.0",
46
+ "fake-useragent>=1.4.0",
47
+ ]
48
+
49
+ [project.optional-dependencies]
50
+ dev = [
51
+ "pytest>=6.0",
52
+ "pytest-asyncio>=0.18.0",
53
+ "pytest-cov>=2.0",
54
+ "black>=22.0",
55
+ "isort>=5.0",
56
+ "flake8>=4.0",
57
+ "mypy>=0.900",
58
+ "pre-commit>=2.0",
59
+ ]
60
+ docs = [
61
+ "sphinx>=4.0",
62
+ "sphinx-rtd-theme>=1.0",
63
+ "myst-parser>=0.17",
64
+ ]
65
+ web = [
66
+ "flask>=2.0.0",
67
+ "flask-cors>=3.0.10",
68
+ "waitress>=3.0.0",
69
+ ]
70
+
71
+ [project.urls]
72
+ Homepage = "https://github.com/dbccccccc/ttsfm"
73
+ Documentation = "https://github.com/dbccccccc/ttsfm/blob/main/docs/"
74
+ Repository = "https://github.com/dbccccccc/ttsfm"
75
+ "Bug Tracker" = "https://github.com/dbccccccc/ttsfm/issues"
76
+
77
+ [project.scripts]
78
+ ttsfm = "ttsfm.cli:main"
79
+
80
+ [tool.setuptools]
81
+ packages = ["ttsfm"]
82
+
83
+ [tool.setuptools.package-data]
84
+ ttsfm = ["py.typed"]
85
+
86
+ [tool.black]
87
+ line-length = 100
88
+ target-version = ['py38']
89
+ include = '\.pyi?$'
90
+ extend-exclude = '''
91
+ /(
92
+ # directories
93
+ \.eggs
94
+ | \.git
95
+ | \.hg
96
+ | \.mypy_cache
97
+ | \.tox
98
+ | \.venv
99
+ | build
100
+ | dist
101
+ )/
102
+ '''
103
+
104
+ [tool.isort]
105
+ profile = "black"
106
+ line_length = 100
107
+ multi_line_output = 3
108
+ include_trailing_comma = true
109
+ force_grid_wrap = 0
110
+ use_parentheses = true
111
+ ensure_newline_before_comments = true
112
+
113
+ [tool.mypy]
114
+ python_version = "3.8"
115
+ warn_return_any = true
116
+ warn_unused_configs = true
117
+ disallow_untyped_defs = true
118
+ disallow_incomplete_defs = true
119
+ check_untyped_defs = true
120
+ disallow_untyped_decorators = true
121
+ no_implicit_optional = true
122
+ warn_redundant_casts = true
123
+ warn_unused_ignores = true
124
+ warn_no_return = true
125
+ warn_unreachable = true
126
+ strict_equality = true
127
+
128
+ [tool.pytest.ini_options]
129
+ minversion = "6.0"
130
+ addopts = "-ra -q --strict-markers --strict-config"
131
+ testpaths = ["tests"]
132
+ python_files = ["test_*.py", "*_test.py"]
133
+ python_classes = ["Test*"]
134
+ python_functions = ["test_*"]
135
+ markers = [
136
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
137
+ "integration: marks tests as integration tests",
138
+ "unit: marks tests as unit tests",
139
+ ]
140
+
141
+ [tool.coverage.run]
142
+ source = ["ttsfm"]
143
+ omit = [
144
+ "*/tests/*",
145
+ "*/test_*",
146
+ "setup.py",
147
+ ]
148
+
149
+ [tool.coverage.report]
150
+ exclude_lines = [
151
+ "pragma: no cover",
152
+ "def __repr__",
153
+ "if self.debug:",
154
+ "if settings.DEBUG",
155
+ "raise AssertionError",
156
+ "raise NotImplementedError",
157
+ "if 0:",
158
+ "if __name__ == .__main__.:",
159
+ "class .*\\bProtocol\\):",
160
+ "@(abc\\.)?abstractmethod",
161
+ ]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Core dependencies for the TTSFM package
2
+ requests>=2.25.0
3
+ aiohttp>=3.8.0
4
+ fake-useragent>=1.4.0
ttsfm-web/app.py ADDED
@@ -0,0 +1,574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TTSFM Web Application
3
+
4
+ A Flask web application that provides a user-friendly interface
5
+ for the TTSFM text-to-speech package.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import logging
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from typing import Dict, Any, Optional
14
+
15
+ from flask import Flask, request, jsonify, send_file, Response, render_template
16
+ from flask_cors import CORS
17
+ from dotenv import load_dotenv
18
+
19
+ # Import the TTSFM package
20
+ try:
21
+ from ttsfm import TTSClient, Voice, AudioFormat, TTSException
22
+ from ttsfm.exceptions import APIException, NetworkException, ValidationException
23
+ from ttsfm.utils import validate_text_length, split_text_by_length
24
+ except ImportError:
25
+ # Fallback for development when package is not installed
26
+ import sys
27
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
28
+ from ttsfm import TTSClient, Voice, AudioFormat, TTSException
29
+ from ttsfm.exceptions import APIException, NetworkException, ValidationException
30
+ from ttsfm.utils import validate_text_length, split_text_by_length
31
+
32
+ # Load environment variables
33
+ load_dotenv()
34
+
35
+ # Configure logging
36
+ logging.basicConfig(
37
+ level=logging.INFO,
38
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
39
+ )
40
+ logger = logging.getLogger(__name__)
41
+
42
+ # Create Flask app
43
+ app = Flask(__name__, static_folder='static', static_url_path='/static')
44
+ CORS(app)
45
+
46
+ # Configuration
47
+ HOST = os.getenv("HOST", "localhost")
48
+ PORT = int(os.getenv("PORT", "8000"))
49
+ DEBUG = os.getenv("DEBUG", "false").lower() == "true"
50
+
51
+ # Create TTS client - now uses openai.fm directly, no configuration needed
52
+ tts_client = TTSClient()
53
+
54
+ logger.info("Initialized web app with TTSFM using openai.fm free service")
55
+
56
+ @app.route('/')
57
+ def index():
58
+ """Serve the main web interface."""
59
+ return render_template('index.html')
60
+
61
+ @app.route('/playground')
62
+ def playground():
63
+ """Serve the interactive playground."""
64
+ return render_template('playground.html')
65
+
66
+ @app.route('/docs')
67
+ def docs():
68
+ """Serve the API documentation."""
69
+ return render_template('docs.html')
70
+
71
+ @app.route('/api/voices', methods=['GET'])
72
+ def get_voices():
73
+ """Get list of available voices."""
74
+ try:
75
+ voices = [
76
+ {
77
+ "id": voice.value,
78
+ "name": voice.value.title(),
79
+ "description": f"{voice.value.title()} voice"
80
+ }
81
+ for voice in Voice
82
+ ]
83
+
84
+ return jsonify({
85
+ "voices": voices,
86
+ "count": len(voices)
87
+ })
88
+
89
+ except Exception as e:
90
+ logger.error(f"Error getting voices: {e}")
91
+ return jsonify({"error": "Failed to get voices"}), 500
92
+
93
+ @app.route('/api/formats', methods=['GET'])
94
+ def get_formats():
95
+ """Get list of supported audio formats."""
96
+ try:
97
+ formats = [
98
+ {
99
+ "id": "mp3",
100
+ "name": "MP3",
101
+ "mime_type": "audio/mpeg",
102
+ "description": "MP3 audio format - good quality, small file size",
103
+ "quality": "Good",
104
+ "file_size": "Small",
105
+ "use_case": "Web, mobile apps, general use"
106
+ },
107
+ {
108
+ "id": "opus",
109
+ "name": "OPUS",
110
+ "mime_type": "audio/opus",
111
+ "description": "OPUS audio format - excellent quality, small file size",
112
+ "quality": "Excellent",
113
+ "file_size": "Small",
114
+ "use_case": "Web streaming, VoIP"
115
+ },
116
+ {
117
+ "id": "aac",
118
+ "name": "AAC",
119
+ "mime_type": "audio/aac",
120
+ "description": "AAC audio format - good quality, medium file size",
121
+ "quality": "Good",
122
+ "file_size": "Medium",
123
+ "use_case": "Apple devices, streaming"
124
+ },
125
+ {
126
+ "id": "flac",
127
+ "name": "FLAC",
128
+ "mime_type": "audio/flac",
129
+ "description": "FLAC audio format - lossless quality, large file size",
130
+ "quality": "Lossless",
131
+ "file_size": "Large",
132
+ "use_case": "High-quality archival"
133
+ },
134
+ {
135
+ "id": "wav",
136
+ "name": "WAV",
137
+ "mime_type": "audio/wav",
138
+ "description": "WAV audio format - lossless quality, large file size",
139
+ "quality": "Lossless",
140
+ "file_size": "Large",
141
+ "use_case": "Professional audio"
142
+ },
143
+ {
144
+ "id": "pcm",
145
+ "name": "PCM",
146
+ "mime_type": "audio/pcm",
147
+ "description": "PCM audio format - raw audio data, large file size",
148
+ "quality": "Raw",
149
+ "file_size": "Large",
150
+ "use_case": "Audio processing"
151
+ }
152
+ ]
153
+
154
+ return jsonify({
155
+ "formats": formats,
156
+ "count": len(formats)
157
+ })
158
+
159
+ except Exception as e:
160
+ logger.error(f"Error getting formats: {e}")
161
+ return jsonify({"error": "Failed to get formats"}), 500
162
+
163
+ @app.route('/api/validate-text', methods=['POST'])
164
+ def validate_text():
165
+ """Validate text length and provide splitting suggestions."""
166
+ try:
167
+ data = request.get_json()
168
+ if not data:
169
+ return jsonify({"error": "No JSON data provided"}), 400
170
+
171
+ text = data.get('text', '').strip()
172
+ max_length = data.get('max_length', 4096)
173
+
174
+ if not text:
175
+ return jsonify({"error": "Text is required"}), 400
176
+
177
+ text_length = len(text)
178
+ is_valid = text_length <= max_length
179
+
180
+ result = {
181
+ "text_length": text_length,
182
+ "max_length": max_length,
183
+ "is_valid": is_valid,
184
+ "needs_splitting": not is_valid
185
+ }
186
+
187
+ if not is_valid:
188
+ # Provide splitting suggestions
189
+ chunks = split_text_by_length(text, max_length, preserve_words=True)
190
+ result.update({
191
+ "suggested_chunks": len(chunks),
192
+ "chunk_preview": [chunk[:100] + "..." if len(chunk) > 100 else chunk for chunk in chunks[:3]]
193
+ })
194
+
195
+ return jsonify(result)
196
+
197
+ except Exception as e:
198
+ logger.error(f"Text validation error: {e}")
199
+ return jsonify({"error": "Text validation failed"}), 500
200
+
201
+ @app.route('/api/generate', methods=['POST'])
202
+ def generate_speech():
203
+ """Generate speech from text using the TTSFM package."""
204
+ try:
205
+ # Parse request data
206
+ data = request.get_json()
207
+ if not data:
208
+ return jsonify({"error": "No JSON data provided"}), 400
209
+
210
+ # Extract parameters
211
+ text = data.get('text', '').strip()
212
+ voice = data.get('voice', Voice.ALLOY.value)
213
+ response_format = data.get('format', AudioFormat.MP3.value)
214
+ instructions = data.get('instructions', '').strip() or None
215
+ max_length = data.get('max_length', 4096)
216
+ validate_length = data.get('validate_length', True)
217
+
218
+ # Validate required fields
219
+ if not text:
220
+ return jsonify({"error": "Text is required"}), 400
221
+
222
+ # Validate voice
223
+ try:
224
+ voice_enum = Voice(voice.lower())
225
+ except ValueError:
226
+ return jsonify({
227
+ "error": f"Invalid voice: {voice}. Must be one of: {[v.value for v in Voice]}"
228
+ }), 400
229
+
230
+ # Validate format
231
+ try:
232
+ format_enum = AudioFormat(response_format.lower())
233
+ except ValueError:
234
+ return jsonify({
235
+ "error": f"Invalid format: {response_format}. Must be one of: {[f.value for f in AudioFormat]}"
236
+ }), 400
237
+
238
+ logger.info(f"Generating speech: text='{text[:50]}...', voice={voice}, format={response_format}")
239
+
240
+ # Generate speech using the TTSFM package with validation
241
+ response = tts_client.generate_speech(
242
+ text=text,
243
+ voice=voice_enum,
244
+ response_format=format_enum,
245
+ instructions=instructions,
246
+ max_length=max_length,
247
+ validate_length=validate_length
248
+ )
249
+
250
+ # Return audio data
251
+ return Response(
252
+ response.audio_data,
253
+ mimetype=response.content_type,
254
+ headers={
255
+ 'Content-Disposition': f'attachment; filename="speech.{response.format.value}"',
256
+ 'Content-Length': str(response.size),
257
+ 'X-Audio-Format': response.format.value,
258
+ 'X-Audio-Size': str(response.size)
259
+ }
260
+ )
261
+
262
+ except ValidationException as e:
263
+ logger.warning(f"Validation error: {e}")
264
+ return jsonify({"error": str(e)}), 400
265
+
266
+ except APIException as e:
267
+ logger.error(f"API error: {e}")
268
+ return jsonify({
269
+ "error": str(e),
270
+ "status_code": getattr(e, 'status_code', 500)
271
+ }), getattr(e, 'status_code', 500)
272
+
273
+ except NetworkException as e:
274
+ logger.error(f"Network error: {e}")
275
+ return jsonify({
276
+ "error": "TTS service is currently unavailable",
277
+ "details": str(e)
278
+ }), 503
279
+
280
+ except TTSException as e:
281
+ logger.error(f"TTS error: {e}")
282
+ return jsonify({"error": str(e)}), 500
283
+
284
+ except Exception as e:
285
+ logger.error(f"Unexpected error: {e}")
286
+ return jsonify({"error": "Internal server error"}), 500
287
+
288
+ @app.route('/api/generate-batch', methods=['POST'])
289
+ def generate_speech_batch():
290
+ """Generate speech from long text by splitting into chunks."""
291
+ try:
292
+ data = request.get_json()
293
+ if not data:
294
+ return jsonify({"error": "No JSON data provided"}), 400
295
+
296
+ text = data.get('text', '').strip()
297
+ voice = data.get('voice', Voice.ALLOY.value)
298
+ response_format = data.get('format', AudioFormat.MP3.value)
299
+ instructions = data.get('instructions', '').strip() or None
300
+ max_length = data.get('max_length', 4096)
301
+ preserve_words = data.get('preserve_words', True)
302
+
303
+ if not text:
304
+ return jsonify({"error": "Text is required"}), 400
305
+
306
+ # Validate voice and format
307
+ try:
308
+ voice_enum = Voice(voice.lower())
309
+ format_enum = AudioFormat(response_format.lower())
310
+ except ValueError as e:
311
+ return jsonify({"error": f"Invalid voice or format: {e}"}), 400
312
+
313
+ # Split text into chunks
314
+ chunks = split_text_by_length(text, max_length, preserve_words)
315
+
316
+ if not chunks:
317
+ return jsonify({"error": "No valid text chunks found"}), 400
318
+
319
+ logger.info(f"Processing {len(chunks)} chunks for batch generation")
320
+
321
+ # Generate speech for each chunk
322
+ results = []
323
+ for i, chunk in enumerate(chunks):
324
+ try:
325
+ response = tts_client.generate_speech(
326
+ text=chunk,
327
+ voice=voice_enum,
328
+ response_format=format_enum,
329
+ instructions=instructions,
330
+ max_length=max_length,
331
+ validate_length=False # Already split
332
+ )
333
+
334
+ # Convert to base64 for JSON response
335
+ import base64
336
+ audio_b64 = base64.b64encode(response.audio_data).decode('utf-8')
337
+
338
+ results.append({
339
+ "chunk_index": i + 1,
340
+ "chunk_text": chunk[:100] + "..." if len(chunk) > 100 else chunk,
341
+ "audio_data": audio_b64,
342
+ "content_type": response.content_type,
343
+ "size": response.size,
344
+ "format": response.format.value
345
+ })
346
+
347
+ except Exception as e:
348
+ logger.error(f"Failed to generate chunk {i+1}: {e}")
349
+ results.append({
350
+ "chunk_index": i + 1,
351
+ "chunk_text": chunk[:100] + "..." if len(chunk) > 100 else chunk,
352
+ "error": str(e)
353
+ })
354
+
355
+ return jsonify({
356
+ "total_chunks": len(chunks),
357
+ "successful_chunks": len([r for r in results if "audio_data" in r]),
358
+ "results": results
359
+ })
360
+
361
+ except Exception as e:
362
+ logger.error(f"Batch generation error: {e}")
363
+ return jsonify({"error": "Batch generation failed"}), 500
364
+
365
+ @app.route('/api/status', methods=['GET'])
366
+ def get_status():
367
+ """Get service status."""
368
+ try:
369
+ # Try to make a simple request to check if the TTS service is available
370
+ test_response = tts_client.generate_speech(
371
+ text="test",
372
+ voice=Voice.ALLOY,
373
+ response_format=AudioFormat.MP3
374
+ )
375
+
376
+ return jsonify({
377
+ "status": "online",
378
+ "tts_service": "openai.fm (free)",
379
+ "package_version": "3.0.0",
380
+ "timestamp": datetime.now().isoformat()
381
+ })
382
+
383
+ except Exception as e:
384
+ logger.error(f"Status check failed: {e}")
385
+ return jsonify({
386
+ "status": "error",
387
+ "tts_service": "openai.fm (free)",
388
+ "error": str(e),
389
+ "timestamp": datetime.now().isoformat()
390
+ }), 503
391
+
392
+ @app.route('/api/health', methods=['GET'])
393
+ def health_check():
394
+ """Simple health check endpoint."""
395
+ return jsonify({
396
+ "status": "healthy",
397
+ "timestamp": datetime.now().isoformat()
398
+ })
399
+
400
+ # OpenAI-compatible API endpoints
401
+ @app.route('/v1/audio/speech', methods=['POST'])
402
+ def openai_speech():
403
+ """OpenAI-compatible speech generation endpoint."""
404
+ try:
405
+ # Parse request data
406
+ data = request.get_json()
407
+ if not data:
408
+ return jsonify({
409
+ "error": {
410
+ "message": "No JSON data provided",
411
+ "type": "invalid_request_error",
412
+ "code": "missing_data"
413
+ }
414
+ }), 400
415
+
416
+ # Extract OpenAI-compatible parameters
417
+ model = data.get('model', 'gpt-4o-mini-tts') # Accept but ignore model
418
+ input_text = data.get('input', '').strip()
419
+ voice = data.get('voice', 'alloy')
420
+ response_format = data.get('response_format', 'mp3')
421
+ instructions = data.get('instructions', '').strip() or None
422
+ speed = data.get('speed', 1.0) # Accept but ignore speed
423
+
424
+ # Validate required fields
425
+ if not input_text:
426
+ return jsonify({
427
+ "error": {
428
+ "message": "Input text is required",
429
+ "type": "invalid_request_error",
430
+ "code": "missing_input"
431
+ }
432
+ }), 400
433
+
434
+ # Validate voice
435
+ try:
436
+ voice_enum = Voice(voice.lower())
437
+ except ValueError:
438
+ return jsonify({
439
+ "error": {
440
+ "message": f"Invalid voice: {voice}. Must be one of: {[v.value for v in Voice]}",
441
+ "type": "invalid_request_error",
442
+ "code": "invalid_voice"
443
+ }
444
+ }), 400
445
+
446
+ # Validate format
447
+ try:
448
+ format_enum = AudioFormat(response_format.lower())
449
+ except ValueError:
450
+ return jsonify({
451
+ "error": {
452
+ "message": f"Invalid response_format: {response_format}. Must be one of: {[f.value for f in AudioFormat]}",
453
+ "type": "invalid_request_error",
454
+ "code": "invalid_format"
455
+ }
456
+ }), 400
457
+
458
+ logger.info(f"OpenAI API: Generating speech: text='{input_text[:50]}...', voice={voice}, format={response_format}")
459
+
460
+ # Generate speech using the TTSFM package
461
+ response = tts_client.generate_speech(
462
+ text=input_text,
463
+ voice=voice_enum,
464
+ response_format=format_enum,
465
+ instructions=instructions,
466
+ max_length=4096,
467
+ validate_length=True
468
+ )
469
+
470
+ # Return audio data in OpenAI format
471
+ return Response(
472
+ response.audio_data,
473
+ mimetype=response.content_type,
474
+ headers={
475
+ 'Content-Type': response.content_type,
476
+ 'Content-Length': str(response.size),
477
+ 'X-Audio-Format': response.format.value,
478
+ 'X-Audio-Size': str(response.size),
479
+ 'X-Powered-By': 'TTSFM-OpenAI-Compatible'
480
+ }
481
+ )
482
+
483
+ except ValidationException as e:
484
+ logger.warning(f"OpenAI API validation error: {e}")
485
+ return jsonify({
486
+ "error": {
487
+ "message": str(e),
488
+ "type": "invalid_request_error",
489
+ "code": "validation_error"
490
+ }
491
+ }), 400
492
+
493
+ except APIException as e:
494
+ logger.error(f"OpenAI API error: {e}")
495
+ return jsonify({
496
+ "error": {
497
+ "message": str(e),
498
+ "type": "api_error",
499
+ "code": "tts_error"
500
+ }
501
+ }), getattr(e, 'status_code', 500)
502
+
503
+ except NetworkException as e:
504
+ logger.error(f"OpenAI API network error: {e}")
505
+ return jsonify({
506
+ "error": {
507
+ "message": "TTS service is currently unavailable",
508
+ "type": "service_unavailable_error",
509
+ "code": "service_unavailable"
510
+ }
511
+ }), 503
512
+
513
+ except Exception as e:
514
+ logger.error(f"OpenAI API unexpected error: {e}")
515
+ return jsonify({
516
+ "error": {
517
+ "message": "An unexpected error occurred",
518
+ "type": "internal_error",
519
+ "code": "internal_error"
520
+ }
521
+ }), 500
522
+
523
+ @app.route('/v1/models', methods=['GET'])
524
+ def openai_models():
525
+ """OpenAI-compatible models endpoint."""
526
+ return jsonify({
527
+ "object": "list",
528
+ "data": [
529
+ {
530
+ "id": "gpt-4o-mini-tts",
531
+ "object": "model",
532
+ "created": 1699564800,
533
+ "owned_by": "ttsfm",
534
+ "permission": [],
535
+ "root": "gpt-4o-mini-tts",
536
+ "parent": None
537
+ }
538
+ ]
539
+ })
540
+
541
+ @app.errorhandler(404)
542
+ def not_found(error):
543
+ """Handle 404 errors."""
544
+ return jsonify({"error": "Endpoint not found"}), 404
545
+
546
+ @app.errorhandler(405)
547
+ def method_not_allowed(error):
548
+ """Handle 405 errors."""
549
+ return jsonify({"error": "Method not allowed"}), 405
550
+
551
+ @app.errorhandler(500)
552
+ def internal_error(error):
553
+ """Handle 500 errors."""
554
+ logger.error(f"Internal server error: {error}")
555
+ return jsonify({"error": "Internal server error"}), 500
556
+
557
+ if __name__ == '__main__':
558
+ logger.info(f"Starting TTSFM web application on {HOST}:{PORT}")
559
+ logger.info("Using openai.fm free TTS service")
560
+ logger.info(f"Debug mode: {DEBUG}")
561
+
562
+ try:
563
+ app.run(
564
+ host=HOST,
565
+ port=PORT,
566
+ debug=DEBUG
567
+ )
568
+ except KeyboardInterrupt:
569
+ logger.info("Application stopped by user")
570
+ except Exception as e:
571
+ logger.error(f"Failed to start application: {e}")
572
+ finally:
573
+ # Clean up TTS client
574
+ tts_client.close()
ttsfm-web/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Web application dependencies
2
+ flask>=2.0.0
3
+ flask-cors>=3.0.10
4
+ waitress>=3.0.0
5
+ python-dotenv>=1.0.0
6
+
7
+ # TTSFM package (install from local directory or PyPI)
8
+ # For local development: pip install -e ../
9
+ # For Docker/production: installed via pyproject.toml[web] dependencies
ttsfm-web/static/css/style.css ADDED
@@ -0,0 +1,1390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* TTSFM Web Application Custom Styles */
2
+
3
+ :root {
4
+ /* Clean Color Palette */
5
+ --primary-color: #2563eb;
6
+ --primary-dark: #1d4ed8;
7
+ --primary-light: #3b82f6;
8
+ --secondary-color: #64748b;
9
+ --secondary-dark: #475569;
10
+ --accent-color: #10b981;
11
+ --accent-dark: #059669;
12
+
13
+ /* Status Colors */
14
+ --success-color: #10b981;
15
+ --warning-color: #f59e0b;
16
+ --danger-color: #ef4444;
17
+ --info-color: #3b82f6;
18
+
19
+ /* Clean Neutral Colors */
20
+ --light-color: #ffffff;
21
+ --light-gray: #f8fafc;
22
+ --medium-gray: #64748b;
23
+ --dark-color: #1e293b;
24
+ --text-color: #374151;
25
+ --text-muted: #6b7280;
26
+
27
+ /* Design System */
28
+ --border-radius: 0.75rem;
29
+ --border-radius-sm: 0.5rem;
30
+ --border-radius-lg: 1rem;
31
+ --box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
32
+ --box-shadow-lg: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);
33
+ --box-shadow-xl: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
34
+ --transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
35
+ --transition-fast: all 0.15s cubic-bezier(0.4, 0, 0.2, 1);
36
+
37
+ /* Gradients */
38
+ --gradient-primary: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-light) 100%);
39
+ --gradient-secondary: linear-gradient(135deg, var(--secondary-color) 0%, var(--secondary-dark) 100%);
40
+ --gradient-accent: linear-gradient(135deg, var(--accent-color) 0%, var(--accent-dark) 100%);
41
+ --gradient-hero: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 50%, var(--accent-color) 100%);
42
+ }
43
+
44
+ /* Global Styles */
45
+ body {
46
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
47
+ line-height: 1.6;
48
+ color: var(--text-color);
49
+ background-color: #ffffff;
50
+ font-weight: 400;
51
+ -webkit-font-smoothing: antialiased;
52
+ -moz-osx-font-smoothing: grayscale;
53
+ }
54
+
55
+ /* Enhanced Typography */
56
+ h1, h2, h3, h4, h5, h6 {
57
+ font-weight: 700;
58
+ line-height: 1.3;
59
+ color: var(--dark-color);
60
+ letter-spacing: -0.025em;
61
+ }
62
+
63
+ .display-1, .display-2, .display-3, .display-4 {
64
+ font-weight: 800;
65
+ letter-spacing: -0.05em;
66
+ }
67
+
68
+ .lead {
69
+ font-size: 1.125rem;
70
+ font-weight: 400;
71
+ color: var(--text-muted);
72
+ line-height: 1.8;
73
+ }
74
+
75
+ /* Simplified Button Styles */
76
+ .btn {
77
+ font-weight: 600;
78
+ border-radius: var(--border-radius-sm);
79
+ transition: all 0.2s ease;
80
+ letter-spacing: 0.025em;
81
+ }
82
+
83
+ .btn-primary {
84
+ background-color: var(--primary-color);
85
+ border-color: var(--primary-color);
86
+ color: white;
87
+ }
88
+
89
+ .btn-primary:hover {
90
+ background-color: var(--primary-dark);
91
+ border-color: var(--primary-dark);
92
+ color: white;
93
+ }
94
+
95
+ .btn-outline-primary {
96
+ border: 2px solid var(--primary-color);
97
+ color: var(--primary-color);
98
+ background: transparent;
99
+ }
100
+
101
+ .btn-outline-primary:hover {
102
+ background: var(--primary-color);
103
+ border-color: var(--primary-color);
104
+ color: white;
105
+ }
106
+
107
+ .btn-lg {
108
+ padding: 0.875rem 2rem;
109
+ font-size: 1.125rem;
110
+ border-radius: var(--border-radius);
111
+ }
112
+
113
+ .btn-sm {
114
+ padding: 0.5rem 1rem;
115
+ font-size: 0.875rem;
116
+ border-radius: var(--border-radius-sm);
117
+ }
118
+
119
+ /* Clean Card Styles */
120
+ .card {
121
+ border: 1px solid #e5e7eb;
122
+ box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
123
+ transition: all 0.2s ease;
124
+ border-radius: 12px;
125
+ background: white;
126
+ }
127
+
128
+ .card:hover {
129
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.07);
130
+ border-color: #d1d5db;
131
+ }
132
+
133
+ .card-body {
134
+ padding: 2rem;
135
+ }
136
+
137
+ /* Clean Hero Section */
138
+ .hero-section {
139
+ background: linear-gradient(135deg, #f8fafc 0%, #ffffff 100%);
140
+ color: var(--text-color);
141
+ padding: 6rem 0;
142
+ min-height: 80vh;
143
+ display: flex;
144
+ align-items: center;
145
+ border-bottom: 1px solid #e5e7eb;
146
+ }
147
+
148
+ .min-vh-75 {
149
+ min-height: 75vh;
150
+ }
151
+
152
+ /* Status Indicators */
153
+ .status-indicator {
154
+ display: inline-block;
155
+ width: 8px;
156
+ height: 8px;
157
+ border-radius: 50%;
158
+ background-color: #6c757d;
159
+ }
160
+
161
+ .status-online {
162
+ background-color: #28a745;
163
+ }
164
+
165
+ .status-offline {
166
+ background-color: #dc3545;
167
+ }
168
+
169
+ /* Footer */
170
+ .footer {
171
+ margin-top: auto;
172
+ }
173
+
174
+ /* Clean Code Blocks */
175
+ pre {
176
+ background-color: #f8fafc !important;
177
+ border: 1px solid #e5e7eb;
178
+ border-radius: 8px;
179
+ font-size: 0.875rem;
180
+ }
181
+
182
+ code {
183
+ color: #374151;
184
+ font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
185
+ }
186
+
187
+ /* Enhanced Form Styles */
188
+ .form-control, .form-select {
189
+ border-radius: var(--border-radius-sm);
190
+ border: 2px solid #e2e8f0;
191
+ transition: var(--transition);
192
+ padding: 0.875rem 1rem;
193
+ font-size: 1rem;
194
+ background-color: #ffffff;
195
+ color: var(--text-color);
196
+ }
197
+
198
+ .form-control:focus, .form-select:focus {
199
+ border-color: var(--primary-color);
200
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
201
+ outline: none;
202
+ background-color: #ffffff;
203
+ }
204
+
205
+ .form-control:hover, .form-select:hover {
206
+ border-color: #cbd5e1;
207
+ }
208
+
209
+ .form-label {
210
+ font-weight: 600;
211
+ color: var(--dark-color);
212
+ margin-bottom: 0.75rem;
213
+ font-size: 0.95rem;
214
+ }
215
+
216
+ .form-text {
217
+ color: var(--text-muted);
218
+ font-size: 0.875rem;
219
+ margin-top: 0.5rem;
220
+ }
221
+
222
+ .form-check-input {
223
+ border-radius: var(--border-radius-sm);
224
+ border: 2px solid #e2e8f0;
225
+ width: 1.25rem;
226
+ height: 1.25rem;
227
+ }
228
+
229
+ .form-check-input:checked {
230
+ background-color: var(--primary-color);
231
+ border-color: var(--primary-color);
232
+ }
233
+
234
+ .form-check-input:focus {
235
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
236
+ }
237
+
238
+ .form-check-label {
239
+ color: var(--text-color);
240
+ font-weight: 500;
241
+ margin-left: 0.5rem;
242
+ }
243
+
244
+ /* Enhanced Status Indicators */
245
+ .status-indicator {
246
+ display: inline-block;
247
+ width: 12px;
248
+ height: 12px;
249
+ border-radius: 50%;
250
+ margin-right: 8px;
251
+ position: relative;
252
+ animation: statusPulse 2s infinite;
253
+ }
254
+
255
+ .status-indicator::before {
256
+ content: '';
257
+ position: absolute;
258
+ top: -2px;
259
+ left: -2px;
260
+ right: -2px;
261
+ bottom: -2px;
262
+ border-radius: 50%;
263
+ opacity: 0.3;
264
+ animation: statusRing 2s infinite;
265
+ }
266
+
267
+ .status-online {
268
+ background-color: var(--success-color);
269
+ box-shadow: 0 0 8px rgba(16, 185, 129, 0.4);
270
+ }
271
+
272
+ .status-online::before {
273
+ background-color: var(--success-color);
274
+ }
275
+
276
+ .status-offline {
277
+ background-color: var(--danger-color);
278
+ box-shadow: 0 0 8px rgba(239, 68, 68, 0.4);
279
+ }
280
+
281
+ .status-offline::before {
282
+ background-color: var(--danger-color);
283
+ }
284
+
285
+ @keyframes statusPulse {
286
+ 0%, 100% { opacity: 1; }
287
+ 50% { opacity: 0.7; }
288
+ }
289
+
290
+ @keyframes statusRing {
291
+ 0% { transform: scale(0.8); opacity: 0.8; }
292
+ 100% { transform: scale(1.4); opacity: 0; }
293
+ }
294
+
295
+ /* Enhanced Audio Player */
296
+ .audio-player {
297
+ width: 100%;
298
+ margin-top: 1rem;
299
+ border-radius: var(--border-radius);
300
+ box-shadow: var(--box-shadow);
301
+ background: var(--light-color);
302
+ padding: 0.5rem;
303
+ }
304
+
305
+ .audio-player::-webkit-media-controls-panel {
306
+ background-color: var(--light-color);
307
+ border-radius: var(--border-radius-sm);
308
+ }
309
+
310
+ /* Enhanced Sections */
311
+ .features-section {
312
+ padding: 6rem 0;
313
+ background: linear-gradient(180deg, #ffffff 0%, var(--light-color) 100%);
314
+ }
315
+
316
+ .stats-section {
317
+ padding: 4rem 0;
318
+ background: var(--gradient-primary);
319
+ color: white;
320
+ position: relative;
321
+ overflow: hidden;
322
+ }
323
+
324
+ .stats-section::before {
325
+ content: '';
326
+ position: absolute;
327
+ top: 0;
328
+ left: 0;
329
+ right: 0;
330
+ bottom: 0;
331
+ background: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"><defs><pattern id="stats-pattern" width="40" height="40" patternUnits="userSpaceOnUse"><circle cx="20" cy="20" r="1" fill="white" opacity="0.1"/></pattern></defs><rect width="100" height="100" fill="url(%23stats-pattern)"/></svg>');
332
+ }
333
+
334
+ .stat-card {
335
+ text-align: center;
336
+ padding: 2rem 1rem;
337
+ background: rgba(255, 255, 255, 0.1);
338
+ border-radius: var(--border-radius);
339
+ backdrop-filter: blur(10px);
340
+ border: 1px solid rgba(255, 255, 255, 0.2);
341
+ transition: var(--transition);
342
+ }
343
+
344
+ .stat-card:hover {
345
+ transform: translateY(-5px);
346
+ background: rgba(255, 255, 255, 0.15);
347
+ }
348
+
349
+ .stat-icon {
350
+ font-size: 2.5rem;
351
+ margin-bottom: 1rem;
352
+ color: rgba(255, 255, 255, 0.9);
353
+ }
354
+
355
+ .stat-number {
356
+ font-size: 3rem;
357
+ font-weight: 800;
358
+ color: white;
359
+ margin-bottom: 0.5rem;
360
+ display: block;
361
+ }
362
+
363
+ .stat-label {
364
+ color: rgba(255, 255, 255, 0.9);
365
+ font-weight: 500;
366
+ font-size: 0.95rem;
367
+ }
368
+
369
+ .quick-start-section {
370
+ padding: 6rem 0;
371
+ }
372
+
373
+ .use-cases-section {
374
+ padding: 6rem 0;
375
+ background: var(--light-color);
376
+ }
377
+
378
+ .tech-specs-section {
379
+ padding: 6rem 0;
380
+ }
381
+
382
+ .faq-section {
383
+ padding: 6rem 0;
384
+ background: var(--light-color);
385
+ }
386
+
387
+ .final-cta-section {
388
+ padding: 6rem 0;
389
+ background: var(--gradient-hero);
390
+ color: white;
391
+ position: relative;
392
+ overflow: hidden;
393
+ }
394
+
395
+ .cta-background-animation {
396
+ position: absolute;
397
+ top: 0;
398
+ left: 0;
399
+ right: 0;
400
+ bottom: 0;
401
+ background: linear-gradient(45deg, transparent 30%, rgba(255,255,255,0.05) 50%, transparent 70%);
402
+ animation: shimmer 4s ease-in-out infinite;
403
+ }
404
+
405
+ .section-badge {
406
+ display: inline-block;
407
+ background: var(--gradient-primary);
408
+ color: white;
409
+ padding: 0.5rem 1.5rem;
410
+ border-radius: 2rem;
411
+ font-size: 0.875rem;
412
+ font-weight: 600;
413
+ margin-bottom: 1.5rem;
414
+ box-shadow: 0 4px 14px 0 rgba(99, 102, 241, 0.3);
415
+ }
416
+
417
+ /* Enhanced Loading States */
418
+ .loading-spinner {
419
+ display: none;
420
+ }
421
+
422
+ .loading .loading-spinner {
423
+ display: inline-block;
424
+ }
425
+
426
+ .loading .btn-text {
427
+ display: none;
428
+ }
429
+
430
+ .loading {
431
+ position: relative;
432
+ overflow: hidden;
433
+ }
434
+
435
+ .loading::after {
436
+ content: '';
437
+ position: absolute;
438
+ top: 0;
439
+ left: -100%;
440
+ width: 100%;
441
+ height: 100%;
442
+ background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
443
+ animation: loading-shimmer 1.5s infinite;
444
+ }
445
+
446
+ @keyframes loading-shimmer {
447
+ 0% { left: -100%; }
448
+ 100% { left: 100%; }
449
+ }
450
+
451
+ /* Enhanced Code Blocks */
452
+ .code-card {
453
+ background: white;
454
+ border-radius: var(--border-radius);
455
+ box-shadow: var(--box-shadow);
456
+ overflow: hidden;
457
+ border: 1px solid #e2e8f0;
458
+ transition: var(--transition);
459
+ }
460
+
461
+ .code-card:hover {
462
+ transform: translateY(-2px);
463
+ box-shadow: var(--box-shadow-lg);
464
+ }
465
+
466
+ .code-header {
467
+ background: var(--light-gray);
468
+ padding: 1rem 1.5rem;
469
+ border-bottom: 1px solid #e2e8f0;
470
+ display: flex;
471
+ justify-content: between;
472
+ align-items: center;
473
+ }
474
+
475
+ .code-header h4 {
476
+ margin: 0;
477
+ font-size: 1.1rem;
478
+ color: var(--dark-color);
479
+ }
480
+
481
+ .code-content {
482
+ padding: 1.5rem;
483
+ background: #f8fafc;
484
+ margin: 0;
485
+ overflow-x: auto;
486
+ }
487
+
488
+ .code-content code {
489
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
490
+ font-size: 0.9rem;
491
+ line-height: 1.6;
492
+ color: var(--text-color);
493
+ }
494
+
495
+ .code-footer {
496
+ padding: 1rem 1.5rem;
497
+ background: white;
498
+ border-top: 1px solid #e2e8f0;
499
+ }
500
+
501
+ .copy-btn {
502
+ font-size: 0.8rem;
503
+ padding: 0.25rem 0.75rem;
504
+ }
505
+
506
+ /* Enhanced Use Case Cards */
507
+ .use-case-card {
508
+ background: white;
509
+ border-radius: var(--border-radius);
510
+ padding: 2rem;
511
+ box-shadow: var(--box-shadow);
512
+ transition: var(--transition);
513
+ border: 1px solid #e2e8f0;
514
+ height: 100%;
515
+ text-align: center;
516
+ }
517
+
518
+ .use-case-card:hover {
519
+ transform: translateY(-4px);
520
+ box-shadow: var(--box-shadow-lg);
521
+ border-color: rgba(99, 102, 241, 0.2);
522
+ }
523
+
524
+ .use-case-icon {
525
+ width: 4rem;
526
+ height: 4rem;
527
+ background: var(--gradient-primary);
528
+ border-radius: 50%;
529
+ display: flex;
530
+ align-items: center;
531
+ justify-content: center;
532
+ font-size: 1.5rem;
533
+ color: white;
534
+ margin: 0 auto 1.5rem;
535
+ box-shadow: 0 4px 14px 0 rgba(99, 102, 241, 0.3);
536
+ }
537
+
538
+ .use-case-title {
539
+ font-size: 1.25rem;
540
+ font-weight: 700;
541
+ color: var(--dark-color);
542
+ margin-bottom: 1rem;
543
+ }
544
+
545
+ .use-case-description {
546
+ color: var(--text-muted);
547
+ margin-bottom: 1.5rem;
548
+ line-height: 1.7;
549
+ }
550
+
551
+ .use-case-examples {
552
+ display: flex;
553
+ flex-wrap: wrap;
554
+ gap: 0.5rem;
555
+ justify-content: center;
556
+ }
557
+
558
+ .use-case-examples .badge {
559
+ font-size: 0.75rem;
560
+ padding: 0.4rem 0.8rem;
561
+ border-radius: 1rem;
562
+ background: var(--light-gray);
563
+ color: var(--text-color);
564
+ border: 1px solid #e2e8f0;
565
+ }
566
+
567
+ /* Enhanced Tech Spec Cards */
568
+ .tech-spec-card {
569
+ background: white;
570
+ border-radius: var(--border-radius);
571
+ padding: 2rem;
572
+ box-shadow: var(--box-shadow);
573
+ transition: var(--transition);
574
+ border: 1px solid #e2e8f0;
575
+ height: 100%;
576
+ }
577
+
578
+ .tech-spec-card:hover {
579
+ transform: translateY(-2px);
580
+ box-shadow: var(--box-shadow-lg);
581
+ }
582
+
583
+ .tech-spec-icon {
584
+ width: 3rem;
585
+ height: 3rem;
586
+ background: var(--gradient-accent);
587
+ border-radius: var(--border-radius-sm);
588
+ display: flex;
589
+ align-items: center;
590
+ justify-content: center;
591
+ font-size: 1.25rem;
592
+ color: white;
593
+ margin: 0 auto 1rem;
594
+ }
595
+
596
+ .tech-spec-card h4, .tech-spec-card h5 {
597
+ color: var(--dark-color);
598
+ margin-bottom: 1.5rem;
599
+ }
600
+
601
+ .tech-spec-card ul {
602
+ list-style: none;
603
+ padding: 0;
604
+ }
605
+
606
+ .tech-spec-card li {
607
+ padding: 0.5rem 0;
608
+ color: var(--text-color);
609
+ border-bottom: 1px solid #f1f5f9;
610
+ }
611
+
612
+ .tech-spec-card li:last-child {
613
+ border-bottom: none;
614
+ }
615
+
616
+ /* Enhanced Validation Styles */
617
+ .badge {
618
+ font-size: 0.75em;
619
+ padding: 0.4em 0.8em;
620
+ border-radius: 1rem;
621
+ font-weight: 600;
622
+ letter-spacing: 0.025em;
623
+ }
624
+
625
+ .validation-result {
626
+ animation: slideDown 0.3s ease;
627
+ }
628
+
629
+ @keyframes slideDown {
630
+ from {
631
+ opacity: 0;
632
+ transform: translateY(-10px);
633
+ }
634
+ to {
635
+ opacity: 1;
636
+ transform: translateY(0);
637
+ }
638
+ }
639
+
640
+ /* Enhanced Alert Styles */
641
+ .alert {
642
+ border-radius: var(--border-radius);
643
+ border: none;
644
+ box-shadow: var(--box-shadow);
645
+ padding: 1rem 1.5rem;
646
+ }
647
+
648
+ .alert-success {
649
+ background: linear-gradient(135deg, rgba(16, 185, 129, 0.1) 0%, rgba(16, 185, 129, 0.05) 100%);
650
+ color: #065f46;
651
+ border-left: 4px solid var(--success-color);
652
+ }
653
+
654
+ .alert-warning {
655
+ background: linear-gradient(135deg, rgba(245, 158, 11, 0.1) 0%, rgba(245, 158, 11, 0.05) 100%);
656
+ color: #92400e;
657
+ border-left: 4px solid var(--warning-color);
658
+ }
659
+
660
+ .alert-danger {
661
+ background: linear-gradient(135deg, rgba(239, 68, 68, 0.1) 0%, rgba(239, 68, 68, 0.05) 100%);
662
+ color: #991b1b;
663
+ border-left: 4px solid var(--danger-color);
664
+ }
665
+
666
+ .alert-info {
667
+ background: linear-gradient(135deg, rgba(59, 130, 246, 0.1) 0%, rgba(59, 130, 246, 0.05) 100%);
668
+ color: #1e40af;
669
+ border-left: 4px solid var(--info-color);
670
+ }
671
+
672
+ /* Enhanced Accordion */
673
+ .accordion-item {
674
+ border: none;
675
+ margin-bottom: 1rem;
676
+ border-radius: var(--border-radius) !important;
677
+ box-shadow: var(--box-shadow);
678
+ overflow: hidden;
679
+ }
680
+
681
+ .accordion-button {
682
+ background: white;
683
+ border: none;
684
+ padding: 1.5rem;
685
+ font-weight: 600;
686
+ color: var(--dark-color);
687
+ border-radius: var(--border-radius) !important;
688
+ }
689
+
690
+ .accordion-button:not(.collapsed) {
691
+ background: var(--light-gray);
692
+ color: var(--primary-color);
693
+ box-shadow: none;
694
+ }
695
+
696
+ .accordion-button:focus {
697
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
698
+ border-color: transparent;
699
+ }
700
+
701
+ .accordion-body {
702
+ padding: 1.5rem;
703
+ background: white;
704
+ color: var(--text-color);
705
+ line-height: 1.7;
706
+ }
707
+
708
+ /* Enhanced CTA Buttons */
709
+ .cta-btn-primary, .cta-btn-secondary {
710
+ position: relative;
711
+ overflow: hidden;
712
+ backdrop-filter: blur(10px);
713
+ border-radius: var(--border-radius);
714
+ }
715
+
716
+ .cta-btn-primary small, .cta-btn-secondary small {
717
+ font-size: 0.75rem;
718
+ opacity: 0.9;
719
+ font-weight: 400;
720
+ }
721
+
722
+ .cta-content {
723
+ position: relative;
724
+ z-index: 2;
725
+ }
726
+
727
+ .cta-buttons {
728
+ margin: 2rem 0;
729
+ }
730
+
731
+ .cta-stats {
732
+ margin-top: 3rem;
733
+ }
734
+
735
+ .cta-stat h4 {
736
+ font-size: 2rem;
737
+ font-weight: 800;
738
+ margin-bottom: 0.25rem;
739
+ }
740
+
741
+ .cta-stat small {
742
+ font-size: 0.9rem;
743
+ opacity: 0.9;
744
+ }
745
+
746
+ /* Enhanced Quick Start */
747
+ .quick-start-cta {
748
+ background: white;
749
+ border-radius: var(--border-radius-lg);
750
+ padding: 3rem;
751
+ box-shadow: var(--box-shadow-lg);
752
+ text-align: center;
753
+ border: 1px solid #e2e8f0;
754
+ }
755
+
756
+ .quick-start-cta h4 {
757
+ color: var(--dark-color);
758
+ margin-bottom: 1.5rem;
759
+ }
760
+
761
+ /* Enhanced Batch Processing */
762
+ .batch-chunk-card {
763
+ transition: var(--transition);
764
+ border: 1px solid #e2e8f0;
765
+ border-radius: var(--border-radius);
766
+ overflow: hidden;
767
+ }
768
+
769
+ .batch-chunk-card:hover {
770
+ transform: translateY(-2px);
771
+ box-shadow: var(--box-shadow-lg);
772
+ border-color: rgba(99, 102, 241, 0.2);
773
+ }
774
+
775
+ .batch-chunk-card .card-body {
776
+ padding: 1.5rem;
777
+ }
778
+
779
+ .batch-chunk-card .card-title {
780
+ font-size: 1rem;
781
+ font-weight: 600;
782
+ color: var(--dark-color);
783
+ }
784
+
785
+ .batch-chunk-card .card-text {
786
+ color: var(--text-muted);
787
+ line-height: 1.6;
788
+ }
789
+
790
+ .download-chunk {
791
+ transition: var(--transition-fast);
792
+ }
793
+
794
+ .download-chunk:hover {
795
+ transform: scale(1.1);
796
+ }
797
+
798
+ /* Enhanced Navigation */
799
+ .navbar {
800
+ backdrop-filter: blur(10px);
801
+ background: rgba(255, 255, 255, 0.95) !important;
802
+ border-bottom: 1px solid rgba(226, 232, 240, 0.8);
803
+ box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
804
+ }
805
+
806
+ .navbar-brand {
807
+ font-weight: 800;
808
+ font-size: 1.5rem;
809
+ color: var(--primary-color) !important;
810
+ transition: var(--transition);
811
+ }
812
+
813
+ .navbar-brand:hover {
814
+ transform: scale(1.05);
815
+ }
816
+
817
+ .navbar-nav .nav-link {
818
+ font-weight: 500;
819
+ transition: var(--transition);
820
+ color: var(--text-color) !important;
821
+ position: relative;
822
+ padding: 0.75rem 1rem !important;
823
+ }
824
+
825
+ .navbar-nav .nav-link::after {
826
+ content: '';
827
+ position: absolute;
828
+ bottom: 0;
829
+ left: 50%;
830
+ width: 0;
831
+ height: 2px;
832
+ background: var(--gradient-primary);
833
+ transition: var(--transition);
834
+ transform: translateX(-50%);
835
+ }
836
+
837
+ .navbar-nav .nav-link:hover::after {
838
+ width: 80%;
839
+ }
840
+
841
+ .navbar-nav .nav-link:hover {
842
+ color: var(--primary-color) !important;
843
+ }
844
+
845
+ .navbar-text {
846
+ color: var(--text-muted) !important;
847
+ font-weight: 500;
848
+ }
849
+
850
+ /* Enhanced Footer */
851
+ .footer {
852
+ background: linear-gradient(135deg, var(--dark-color) 0%, #2d3748 100%);
853
+ color: white;
854
+ padding: 3rem 0 2rem;
855
+ margin-top: 6rem;
856
+ position: relative;
857
+ overflow: hidden;
858
+ }
859
+
860
+ .footer::before {
861
+ content: '';
862
+ position: absolute;
863
+ top: 0;
864
+ left: 0;
865
+ right: 0;
866
+ bottom: 0;
867
+ background: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"><defs><pattern id="footer-pattern" width="20" height="20" patternUnits="userSpaceOnUse"><circle cx="10" cy="10" r="0.5" fill="white" opacity="0.1"/></pattern></defs><rect width="100" height="100" fill="url(%23footer-pattern)"/></svg>');
868
+ }
869
+
870
+ .footer h5 {
871
+ color: white;
872
+ font-weight: 700;
873
+ margin-bottom: 1rem;
874
+ }
875
+
876
+ .footer p, .footer a {
877
+ color: rgba(255, 255, 255, 0.8);
878
+ transition: var(--transition);
879
+ }
880
+
881
+ .footer a:hover {
882
+ color: white;
883
+ text-decoration: none;
884
+ }
885
+
886
+ /* Enhanced Responsive Design */
887
+ @media (max-width: 1200px) {
888
+ .hero-section {
889
+ padding: 4rem 0;
890
+ }
891
+
892
+ .floating-icon-container {
893
+ width: 250px;
894
+ height: 250px;
895
+ }
896
+
897
+ .floating-icon {
898
+ width: 50px;
899
+ height: 50px;
900
+ font-size: 1.25rem;
901
+ }
902
+
903
+ .hero-main-icon {
904
+ width: 100px;
905
+ height: 100px;
906
+ font-size: 2.5rem;
907
+ }
908
+ }
909
+
910
+ @media (max-width: 992px) {
911
+ .hero-section {
912
+ padding: 3rem 0;
913
+ min-height: auto;
914
+ }
915
+
916
+ .display-3 {
917
+ font-size: 2.5rem;
918
+ }
919
+
920
+ .features-section, .stats-section, .quick-start-section,
921
+ .use-cases-section, .tech-specs-section, .faq-section,
922
+ .final-cta-section {
923
+ padding: 4rem 0;
924
+ }
925
+
926
+ .floating-icon-container {
927
+ display: none;
928
+ }
929
+
930
+ .hero-visual {
931
+ margin-top: 2rem;
932
+ }
933
+ }
934
+
935
+ @media (max-width: 768px) {
936
+ .hero-section {
937
+ padding: 2rem 0;
938
+ text-align: center;
939
+ }
940
+
941
+ .display-3 {
942
+ font-size: 2rem;
943
+ }
944
+
945
+ .lead {
946
+ font-size: 1rem;
947
+ }
948
+
949
+ .btn-lg {
950
+ padding: 0.75rem 1.5rem;
951
+ font-size: 1rem;
952
+ width: 100%;
953
+ margin-bottom: 1rem;
954
+ }
955
+
956
+ .hero-stats .col-4 {
957
+ margin-bottom: 1rem;
958
+ }
959
+
960
+ .stat-item h3 {
961
+ font-size: 2rem;
962
+ }
963
+
964
+ .features-section, .stats-section, .quick-start-section,
965
+ .use-cases-section, .tech-specs-section, .faq-section,
966
+ .final-cta-section {
967
+ padding: 3rem 0;
968
+ }
969
+
970
+ .feature-card-enhanced, .use-case-card, .tech-spec-card {
971
+ margin-bottom: 2rem;
972
+ }
973
+
974
+ .code-card {
975
+ margin-bottom: 1.5rem;
976
+ }
977
+
978
+ .code-header {
979
+ flex-direction: column;
980
+ gap: 1rem;
981
+ text-align: center;
982
+ }
983
+
984
+ .quick-start-cta {
985
+ padding: 2rem 1rem;
986
+ }
987
+
988
+ .cta-buttons .btn {
989
+ width: 100%;
990
+ margin-bottom: 1rem;
991
+ }
992
+
993
+ .navbar-nav {
994
+ text-align: center;
995
+ padding: 1rem 0;
996
+ }
997
+
998
+ .toc {
999
+ position: static;
1000
+ margin-bottom: 2rem;
1001
+ max-height: none;
1002
+ }
1003
+ }
1004
+
1005
+ @media (max-width: 576px) {
1006
+ .container {
1007
+ padding-left: 1rem;
1008
+ padding-right: 1rem;
1009
+ }
1010
+
1011
+ .hero-section {
1012
+ padding: 1.5rem 0;
1013
+ }
1014
+
1015
+ .display-3 {
1016
+ font-size: 1.75rem;
1017
+ }
1018
+
1019
+ .card-body {
1020
+ padding: 1.5rem;
1021
+ }
1022
+
1023
+ .feature-card-enhanced, .use-case-card, .tech-spec-card {
1024
+ padding: 1.5rem;
1025
+ }
1026
+
1027
+ .stat-number {
1028
+ font-size: 2.5rem;
1029
+ }
1030
+
1031
+ .hero-main-icon {
1032
+ width: 80px;
1033
+ height: 80px;
1034
+ font-size: 2rem;
1035
+ }
1036
+
1037
+ .pulse-ring {
1038
+ width: 100px;
1039
+ height: 100px;
1040
+ }
1041
+ }
1042
+
1043
+ /* Enhanced Accessibility */
1044
+ .btn:focus,
1045
+ .form-control:focus,
1046
+ .form-select:focus,
1047
+ .form-check-input:focus {
1048
+ outline: 3px solid rgba(99, 102, 241, 0.3);
1049
+ outline-offset: 2px;
1050
+ }
1051
+
1052
+ .btn:focus-visible,
1053
+ .form-control:focus-visible,
1054
+ .form-select:focus-visible {
1055
+ outline: 3px solid var(--primary-color);
1056
+ outline-offset: 2px;
1057
+ }
1058
+
1059
+ /* Skip to content link for screen readers */
1060
+ .skip-link {
1061
+ position: absolute;
1062
+ top: -40px;
1063
+ left: 6px;
1064
+ background: var(--primary-color);
1065
+ color: white;
1066
+ padding: 8px;
1067
+ text-decoration: none;
1068
+ border-radius: 4px;
1069
+ z-index: 1000;
1070
+ }
1071
+
1072
+ .skip-link:focus {
1073
+ top: 6px;
1074
+ }
1075
+
1076
+ /* Enhanced Animation Classes */
1077
+ .fade-in {
1078
+ animation: fadeIn 0.6s cubic-bezier(0.4, 0, 0.2, 1);
1079
+ }
1080
+
1081
+ @keyframes fadeIn {
1082
+ from {
1083
+ opacity: 0;
1084
+ transform: translateY(10px);
1085
+ }
1086
+ to {
1087
+ opacity: 1;
1088
+ transform: translateY(0);
1089
+ }
1090
+ }
1091
+
1092
+ .slide-up {
1093
+ animation: slideUp 0.6s cubic-bezier(0.4, 0, 0.2, 1);
1094
+ }
1095
+
1096
+ @keyframes slideUp {
1097
+ from {
1098
+ opacity: 0;
1099
+ transform: translateY(30px);
1100
+ }
1101
+ to {
1102
+ opacity: 1;
1103
+ transform: translateY(0);
1104
+ }
1105
+ }
1106
+
1107
+ .scale-in {
1108
+ animation: scaleIn 0.5s cubic-bezier(0.4, 0, 0.2, 1);
1109
+ }
1110
+
1111
+ @keyframes scaleIn {
1112
+ from {
1113
+ opacity: 0;
1114
+ transform: scale(0.9);
1115
+ }
1116
+ to {
1117
+ opacity: 1;
1118
+ transform: scale(1);
1119
+ }
1120
+ }
1121
+
1122
+ /* Enhanced Utility Classes */
1123
+ .text-gradient {
1124
+ background: var(--gradient-primary);
1125
+ -webkit-background-clip: text;
1126
+ -webkit-text-fill-color: transparent;
1127
+ background-clip: text;
1128
+ }
1129
+
1130
+ .text-gradient-secondary {
1131
+ background: var(--gradient-secondary);
1132
+ -webkit-background-clip: text;
1133
+ -webkit-text-fill-color: transparent;
1134
+ background-clip: text;
1135
+ }
1136
+
1137
+ .shadow-custom {
1138
+ box-shadow: var(--box-shadow);
1139
+ }
1140
+
1141
+ .shadow-lg-custom {
1142
+ box-shadow: var(--box-shadow-lg);
1143
+ }
1144
+
1145
+ .shadow-xl-custom {
1146
+ box-shadow: var(--box-shadow-xl);
1147
+ }
1148
+
1149
+ .border-radius-custom {
1150
+ border-radius: var(--border-radius);
1151
+ }
1152
+
1153
+ .bg-gradient-primary {
1154
+ background: var(--gradient-primary);
1155
+ }
1156
+
1157
+ .bg-gradient-secondary {
1158
+ background: var(--gradient-secondary);
1159
+ }
1160
+
1161
+ .bg-gradient-accent {
1162
+ background: var(--gradient-accent);
1163
+ }
1164
+
1165
+ /* Enhanced Progress Indicators */
1166
+ .progress-custom {
1167
+ height: 10px;
1168
+ border-radius: var(--border-radius-sm);
1169
+ background-color: #e2e8f0;
1170
+ overflow: hidden;
1171
+ box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
1172
+ }
1173
+
1174
+ .progress-bar-custom {
1175
+ height: 100%;
1176
+ background: var(--gradient-primary);
1177
+ transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1);
1178
+ position: relative;
1179
+ overflow: hidden;
1180
+ }
1181
+
1182
+ .progress-bar-custom::after {
1183
+ content: '';
1184
+ position: absolute;
1185
+ top: 0;
1186
+ left: 0;
1187
+ right: 0;
1188
+ bottom: 0;
1189
+ background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
1190
+ animation: progress-shimmer 2s infinite;
1191
+ }
1192
+
1193
+ @keyframes progress-shimmer {
1194
+ 0% { transform: translateX(-100%); }
1195
+ 100% { transform: translateX(100%); }
1196
+ }
1197
+
1198
+ /* Enhanced Tooltip */
1199
+ .tooltip-inner {
1200
+ background-color: var(--dark-color);
1201
+ border-radius: var(--border-radius-sm);
1202
+ font-size: 0.875rem;
1203
+ padding: 0.5rem 0.75rem;
1204
+ box-shadow: var(--box-shadow);
1205
+ }
1206
+
1207
+ /* Enhanced Custom Scrollbar */
1208
+ ::-webkit-scrollbar {
1209
+ width: 10px;
1210
+ height: 10px;
1211
+ }
1212
+
1213
+ ::-webkit-scrollbar-track {
1214
+ background: var(--light-gray);
1215
+ border-radius: var(--border-radius-sm);
1216
+ }
1217
+
1218
+ ::-webkit-scrollbar-thumb {
1219
+ background: var(--gradient-primary);
1220
+ border-radius: var(--border-radius-sm);
1221
+ border: 2px solid var(--light-gray);
1222
+ }
1223
+
1224
+ ::-webkit-scrollbar-thumb:hover {
1225
+ background: var(--gradient-secondary);
1226
+ }
1227
+
1228
+ ::-webkit-scrollbar-corner {
1229
+ background: var(--light-gray);
1230
+ }
1231
+
1232
+ /* Print Styles */
1233
+ @media print {
1234
+ .navbar, .footer, .hero-scroll-indicator, .floating-icon-container {
1235
+ display: none !important;
1236
+ }
1237
+
1238
+ .hero-section {
1239
+ background: white !important;
1240
+ color: black !important;
1241
+ padding: 1rem 0 !important;
1242
+ }
1243
+
1244
+ .card {
1245
+ box-shadow: none !important;
1246
+ border: 1px solid #ddd !important;
1247
+ }
1248
+
1249
+ .btn {
1250
+ border: 1px solid #ddd !important;
1251
+ background: white !important;
1252
+ color: black !important;
1253
+ }
1254
+ }
1255
+
1256
+ /* Playground-Specific Styles */
1257
+ .playground-visual {
1258
+ position: relative;
1259
+ display: flex;
1260
+ justify-content: center;
1261
+ align-items: center;
1262
+ height: 200px;
1263
+ }
1264
+
1265
+ .playground-icon {
1266
+ width: 100px;
1267
+ height: 100px;
1268
+ background: rgba(255, 255, 255, 0.15);
1269
+ border-radius: 50%;
1270
+ display: flex;
1271
+ align-items: center;
1272
+ justify-content: center;
1273
+ font-size: 2.5rem;
1274
+ color: white;
1275
+ backdrop-filter: blur(20px);
1276
+ border: 2px solid rgba(255, 255, 255, 0.3);
1277
+ position: relative;
1278
+ }
1279
+
1280
+ .audio-player-container {
1281
+ border: 2px solid #e2e8f0;
1282
+ transition: var(--transition);
1283
+ }
1284
+
1285
+ .audio-player-container:hover {
1286
+ border-color: var(--primary-color);
1287
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
1288
+ }
1289
+
1290
+ .stat-item {
1291
+ padding: 1rem;
1292
+ text-align: center;
1293
+ }
1294
+
1295
+ .stat-item i {
1296
+ font-size: 1.5rem;
1297
+ margin-bottom: 0.5rem;
1298
+ display: block;
1299
+ }
1300
+
1301
+ .stat-value {
1302
+ font-size: 1.25rem;
1303
+ font-weight: 700;
1304
+ color: var(--dark-color);
1305
+ margin-bottom: 0.25rem;
1306
+ }
1307
+
1308
+ .stat-label {
1309
+ font-size: 0.875rem;
1310
+ color: var(--text-muted);
1311
+ font-weight: 500;
1312
+ }
1313
+
1314
+ .card-header {
1315
+ border-bottom: none;
1316
+ border-radius: var(--border-radius) var(--border-radius) 0 0 !important;
1317
+ }
1318
+
1319
+ /* Enhanced Form Controls for Playground */
1320
+ .playground .form-control,
1321
+ .playground .form-select {
1322
+ border: 2px solid #e2e8f0;
1323
+ border-radius: var(--border-radius-sm);
1324
+ padding: 1rem;
1325
+ font-size: 1rem;
1326
+ transition: var(--transition);
1327
+ }
1328
+
1329
+ .playground .form-control:focus,
1330
+ .playground .form-select:focus {
1331
+ border-color: var(--primary-color);
1332
+ box-shadow: 0 0 0 4px rgba(99, 102, 241, 0.1);
1333
+ transform: translateY(-1px);
1334
+ }
1335
+
1336
+ .playground .btn-group .btn {
1337
+ border-radius: var(--border-radius-sm);
1338
+ }
1339
+
1340
+ .playground .btn-group .btn:first-child {
1341
+ border-top-right-radius: 0;
1342
+ border-bottom-right-radius: 0;
1343
+ }
1344
+
1345
+ .playground .btn-group .btn:last-child {
1346
+ border-top-left-radius: 0;
1347
+ border-bottom-left-radius: 0;
1348
+ }
1349
+
1350
+ /* Audio Player Enhancements */
1351
+ audio::-webkit-media-controls-panel {
1352
+ background-color: var(--light-gray);
1353
+ border-radius: var(--border-radius-sm);
1354
+ }
1355
+
1356
+ audio::-webkit-media-controls-play-button,
1357
+ audio::-webkit-media-controls-pause-button {
1358
+ background-color: var(--primary-color);
1359
+ border-radius: 50%;
1360
+ }
1361
+
1362
+ audio::-webkit-media-controls-timeline {
1363
+ background-color: var(--light-gray);
1364
+ border-radius: var(--border-radius-sm);
1365
+ }
1366
+
1367
+ audio::-webkit-media-controls-current-time-display,
1368
+ audio::-webkit-media-controls-time-remaining-display {
1369
+ color: var(--text-color);
1370
+ font-weight: 500;
1371
+ }
1372
+
1373
+ /* Reduced Motion Support */
1374
+ @media (prefers-reduced-motion: reduce) {
1375
+ *,
1376
+ *::before,
1377
+ *::after {
1378
+ animation-duration: 0.01ms !important;
1379
+ animation-iteration-count: 1 !important;
1380
+ transition-duration: 0.01ms !important;
1381
+ }
1382
+
1383
+ .hero-background-animation,
1384
+ .floating-icon,
1385
+ .pulse-ring,
1386
+ .hero-scroll-indicator,
1387
+ .playground-icon {
1388
+ animation: none !important;
1389
+ }
1390
+ }
ttsfm-web/static/js/playground.js ADDED
@@ -0,0 +1,745 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // TTSFM Playground JavaScript
2
+
3
+ // Global variables
4
+ let currentAudioBlob = null;
5
+ let currentFormat = 'mp3';
6
+ let batchResults = [];
7
+
8
+ // Initialize playground
9
+ document.addEventListener('DOMContentLoaded', function() {
10
+ initializePlayground();
11
+ });
12
+
13
+ function initializePlayground() {
14
+ loadVoices();
15
+ loadFormats();
16
+ updateCharCount();
17
+ setupEventListeners();
18
+
19
+ // Initialize tooltips if Bootstrap is available
20
+ if (typeof bootstrap !== 'undefined') {
21
+ const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
22
+ tooltipTriggerList.map(function (tooltipTriggerEl) {
23
+ return new bootstrap.Tooltip(tooltipTriggerEl);
24
+ });
25
+ }
26
+ }
27
+
28
+ function setupEventListeners() {
29
+ // Form and input events
30
+ document.getElementById('text-input').addEventListener('input', updateCharCount);
31
+ document.getElementById('tts-form').addEventListener('submit', generateSpeech);
32
+ document.getElementById('max-length-input').addEventListener('input', updateCharCount);
33
+ document.getElementById('auto-split-check').addEventListener('change', updateGenerateButton);
34
+
35
+ // Enhanced button events
36
+ document.getElementById('validate-text-btn').addEventListener('click', validateText);
37
+ document.getElementById('random-text-btn').addEventListener('click', loadRandomText);
38
+ document.getElementById('download-btn').addEventListener('click', downloadAudio);
39
+ document.getElementById('download-all-btn').addEventListener('click', downloadAllAudio);
40
+
41
+ // New button events
42
+ const clearTextBtn = document.getElementById('clear-text-btn');
43
+ if (clearTextBtn) {
44
+ clearTextBtn.addEventListener('click', clearText);
45
+ }
46
+
47
+
48
+
49
+ const resetFormBtn = document.getElementById('reset-form-btn');
50
+ if (resetFormBtn) {
51
+ resetFormBtn.addEventListener('click', resetForm);
52
+ }
53
+
54
+ const replayBtn = document.getElementById('replay-btn');
55
+ if (replayBtn) {
56
+ replayBtn.addEventListener('click', replayAudio);
57
+ }
58
+
59
+ const shareBtn = document.getElementById('share-btn');
60
+ if (shareBtn) {
61
+ shareBtn.addEventListener('click', shareAudio);
62
+ }
63
+
64
+ // Voice and format selection events
65
+ document.getElementById('voice-select').addEventListener('change', updateVoiceInfo);
66
+ document.getElementById('format-select').addEventListener('change', updateFormatInfo);
67
+
68
+ // Example text buttons
69
+ document.querySelectorAll('.use-example').forEach(button => {
70
+ button.addEventListener('click', function() {
71
+ document.getElementById('text-input').value = this.dataset.text;
72
+ updateCharCount();
73
+ // Add visual feedback
74
+ this.classList.add('btn-success');
75
+ setTimeout(() => {
76
+ this.classList.remove('btn-success');
77
+ this.classList.add('btn-outline-primary');
78
+ }, 1000);
79
+ });
80
+ });
81
+
82
+ // Keyboard shortcuts
83
+ document.addEventListener('keydown', function(e) {
84
+ // Ctrl/Cmd + Enter to generate speech
85
+ if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
86
+ e.preventDefault();
87
+ document.getElementById('generate-btn').click();
88
+ }
89
+
90
+ // Escape to clear results
91
+ if (e.key === 'Escape') {
92
+ clearResults();
93
+ }
94
+ });
95
+ }
96
+
97
+ async function loadVoices() {
98
+ try {
99
+ const response = await fetch('/api/voices');
100
+ const data = await response.json();
101
+
102
+ const select = document.getElementById('voice-select');
103
+ select.innerHTML = '';
104
+
105
+ data.voices.forEach(voice => {
106
+ const option = document.createElement('option');
107
+ option.value = voice.id;
108
+ option.textContent = `${voice.name} - ${voice.description}`;
109
+ select.appendChild(option);
110
+ });
111
+
112
+ // Select default voice
113
+ select.value = 'alloy';
114
+
115
+ } catch (error) {
116
+ console.error('Failed to load voices:', error);
117
+ console.log('Failed to load voices. Please refresh the page.');
118
+ }
119
+ }
120
+
121
+ async function loadFormats() {
122
+ try {
123
+ const response = await fetch('/api/formats');
124
+ const data = await response.json();
125
+
126
+ const select = document.getElementById('format-select');
127
+ select.innerHTML = '';
128
+
129
+ data.formats.forEach(format => {
130
+ const option = document.createElement('option');
131
+ option.value = format.id;
132
+ option.textContent = `${format.name} - ${format.description}`;
133
+ select.appendChild(option);
134
+ });
135
+
136
+ // Select default format
137
+ select.value = 'mp3';
138
+ updateFormatInfo();
139
+
140
+ } catch (error) {
141
+ console.error('Failed to load formats:', error);
142
+ console.log('Failed to load formats. Please refresh the page.');
143
+ }
144
+ }
145
+
146
+ function updateCharCount() {
147
+ const text = document.getElementById('text-input').value;
148
+ const maxLength = parseInt(document.getElementById('max-length-input').value) || 4096;
149
+ const charCount = text.length;
150
+
151
+ document.getElementById('char-count').textContent = charCount.toLocaleString();
152
+
153
+ // Update length status with better visual feedback
154
+ const statusElement = document.getElementById('length-status');
155
+ const percentage = (charCount / maxLength) * 100;
156
+
157
+ if (charCount > maxLength) {
158
+ statusElement.innerHTML = '<span class="badge bg-danger"><i class="fas fa-exclamation-triangle me-1"></i>Exceeds limit</span>';
159
+ } else if (percentage > 80) {
160
+ statusElement.innerHTML = '<span class="badge bg-warning"><i class="fas fa-exclamation me-1"></i>Near limit</span>';
161
+ } else if (percentage > 50) {
162
+ statusElement.innerHTML = '<span class="badge bg-info"><i class="fas fa-info me-1"></i>Good</span>';
163
+ } else {
164
+ statusElement.innerHTML = '<span class="badge bg-success"><i class="fas fa-check me-1"></i>OK</span>';
165
+ }
166
+
167
+ updateGenerateButton();
168
+ }
169
+
170
+ function updateGenerateButton() {
171
+ const text = document.getElementById('text-input').value;
172
+ const maxLength = parseInt(document.getElementById('max-length-input').value) || 4096;
173
+ const autoSplit = document.getElementById('auto-split-check').checked;
174
+ const generateBtn = document.getElementById('generate-btn');
175
+ const btnText = generateBtn.querySelector('.btn-text');
176
+
177
+ if (text.length > maxLength && autoSplit) {
178
+ btnText.innerHTML = '<i class="fas fa-layer-group me-2"></i>Generate Speech (Batch Mode)';
179
+ generateBtn.classList.add('btn-warning');
180
+ generateBtn.classList.remove('btn-primary');
181
+ } else {
182
+ btnText.innerHTML = '<i class="fas fa-magic me-2"></i>Generate Speech';
183
+ generateBtn.classList.add('btn-primary');
184
+ generateBtn.classList.remove('btn-warning');
185
+ }
186
+ }
187
+
188
+ async function validateText() {
189
+ const text = document.getElementById('text-input').value.trim();
190
+ const maxLength = parseInt(document.getElementById('max-length-input').value) || 4096;
191
+
192
+ if (!text) {
193
+ console.log('Please enter some text to validate');
194
+ return;
195
+ }
196
+
197
+ const validateBtn = document.getElementById('validate-text-btn');
198
+ setLoading(validateBtn, true);
199
+
200
+ try {
201
+ const response = await fetch('/api/validate-text', {
202
+ method: 'POST',
203
+ headers: { 'Content-Type': 'application/json' },
204
+ body: JSON.stringify({ text, max_length: maxLength })
205
+ });
206
+
207
+ const data = await response.json();
208
+ const resultDiv = document.getElementById('validation-result');
209
+
210
+ if (data.is_valid) {
211
+ resultDiv.innerHTML = `
212
+ <div class="alert alert-success fade-in">
213
+ <i class="fas fa-check-circle me-2"></i>
214
+ <strong>Text is valid!</strong> (${data.text_length.toLocaleString()} characters)
215
+ <div class="progress progress-custom mt-2">
216
+ <div class="progress-bar-custom" style="width: ${(data.text_length / data.max_length) * 100}%"></div>
217
+ </div>
218
+ </div>
219
+ `;
220
+ } else {
221
+ resultDiv.innerHTML = `
222
+ <div class="alert alert-warning fade-in">
223
+ <i class="fas fa-exclamation-triangle me-2"></i>
224
+ <strong>Text exceeds limit!</strong> (${data.text_length.toLocaleString()}/${data.max_length.toLocaleString()} characters)
225
+ <br><small class="mt-2 d-block">Suggested chunks: ${data.suggested_chunks}</small>
226
+ <div class="mt-3">
227
+ <strong>Preview of chunks:</strong>
228
+ <div class="mt-2">
229
+ ${data.chunk_preview.map((chunk, i) => `
230
+ <div class="border rounded p-2 mb-2 bg-light">
231
+ <small class="text-muted">Chunk ${i+1}:</small>
232
+ <div class="small">${chunk}</div>
233
+ </div>
234
+ `).join('')}
235
+ </div>
236
+ <button class="btn btn-sm btn-outline-primary mt-2" onclick="enableAutoSplit()">
237
+ <i class="fas fa-magic me-1"></i>Enable Auto-Split
238
+ </button>
239
+ </div>
240
+ </div>
241
+ `;
242
+ }
243
+
244
+ resultDiv.classList.remove('d-none');
245
+ resultDiv.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
246
+
247
+ } catch (error) {
248
+ console.error('Validation failed:', error);
249
+ console.log('Failed to validate text. Please try again.');
250
+ } finally {
251
+ setLoading(validateBtn, false);
252
+ }
253
+ }
254
+
255
+ function enableAutoSplit() {
256
+ document.getElementById('auto-split-check').checked = true;
257
+ updateGenerateButton();
258
+ console.log('Auto-split enabled! Click Generate Speech to process in batch mode.');
259
+ }
260
+
261
+ async function generateSpeech(event) {
262
+ event.preventDefault();
263
+
264
+ const button = document.getElementById('generate-btn');
265
+ const audioResult = document.getElementById('audio-result');
266
+ const batchResult = document.getElementById('batch-result');
267
+
268
+ // Get form data
269
+ const formData = getFormData();
270
+
271
+ if (!validateFormData(formData)) {
272
+ return;
273
+ }
274
+
275
+ // Check if we need batch processing
276
+ const needsBatch = formData.text.length > formData.maxLength && formData.autoSplit;
277
+
278
+ // Show loading state
279
+ setLoading(button, true);
280
+ clearResults();
281
+
282
+ try {
283
+ if (needsBatch) {
284
+ await generateBatchSpeech(formData);
285
+ } else {
286
+ await generateSingleSpeech(formData);
287
+ }
288
+ } catch (error) {
289
+ console.error('Generation failed:', error);
290
+ console.log(`Failed to generate speech: ${error.message}`);
291
+ } finally {
292
+ setLoading(button, false);
293
+ }
294
+ }
295
+
296
+ function getFormData() {
297
+ return {
298
+ text: document.getElementById('text-input').value.trim(),
299
+ voice: document.getElementById('voice-select').value,
300
+ format: document.getElementById('format-select').value,
301
+ instructions: document.getElementById('instructions-input').value.trim(),
302
+ maxLength: parseInt(document.getElementById('max-length-input').value) || 4096,
303
+ validateLength: document.getElementById('validate-length-check').checked,
304
+ autoSplit: document.getElementById('auto-split-check').checked
305
+ };
306
+ }
307
+
308
+ function validateFormData(formData) {
309
+ if (!formData.text || !formData.voice || !formData.format) {
310
+ console.log('Please fill in all required fields');
311
+ return false;
312
+ }
313
+
314
+ if (formData.text.length > formData.maxLength && formData.validateLength && !formData.autoSplit) {
315
+ console.log(`Text is too long (${formData.text.length} characters). Enable auto-split or reduce text length.`);
316
+ return false;
317
+ }
318
+
319
+ return true;
320
+ }
321
+
322
+ function clearResults() {
323
+ document.getElementById('audio-result').classList.add('d-none');
324
+ document.getElementById('batch-result').classList.add('d-none');
325
+ document.getElementById('validation-result').classList.add('d-none');
326
+ }
327
+
328
+ // Utility functions
329
+ function setLoading(button, loading) {
330
+ if (loading) {
331
+ button.classList.add('loading');
332
+ button.disabled = true;
333
+ } else {
334
+ button.classList.remove('loading');
335
+ button.disabled = false;
336
+ }
337
+ }
338
+
339
+
340
+
341
+ async function generateSingleSpeech(formData) {
342
+ const audioResult = document.getElementById('audio-result');
343
+
344
+ const response = await fetch('/api/generate', {
345
+ method: 'POST',
346
+ headers: { 'Content-Type': 'application/json' },
347
+ body: JSON.stringify({
348
+ text: formData.text,
349
+ voice: formData.voice,
350
+ format: formData.format,
351
+ instructions: formData.instructions || undefined,
352
+ max_length: formData.maxLength,
353
+ validate_length: formData.validateLength
354
+ })
355
+ });
356
+
357
+ if (!response.ok) {
358
+ const errorData = await response.json();
359
+ throw new Error(errorData.error || `HTTP ${response.status}`);
360
+ }
361
+
362
+ // Get audio data
363
+ const audioBlob = await response.blob();
364
+ currentAudioBlob = audioBlob;
365
+ currentFormat = formData.format;
366
+
367
+ // Create audio URL and setup player
368
+ const audioUrl = URL.createObjectURL(audioBlob);
369
+ const audioPlayer = document.getElementById('audio-player');
370
+ audioPlayer.src = audioUrl;
371
+
372
+ // Use enhanced display function
373
+ displayAudioResult(audioBlob, formData.format, formData.voice, formData.text);
374
+
375
+ console.log('Speech generated successfully! Click play to listen.');
376
+
377
+ // Auto-play if user prefers
378
+ if (localStorage.getItem('autoPlay') === 'true') {
379
+ audioPlayer.play().catch(() => {
380
+ // Auto-play blocked, that's fine
381
+ });
382
+ }
383
+ }
384
+
385
+ async function generateBatchSpeech(formData) {
386
+ const batchResult = document.getElementById('batch-result');
387
+
388
+ const response = await fetch('/api/generate-batch', {
389
+ method: 'POST',
390
+ headers: { 'Content-Type': 'application/json' },
391
+ body: JSON.stringify({
392
+ text: formData.text,
393
+ voice: formData.voice,
394
+ format: formData.format,
395
+ instructions: formData.instructions || undefined,
396
+ max_length: formData.maxLength,
397
+ preserve_words: true
398
+ })
399
+ });
400
+
401
+ if (!response.ok) {
402
+ const errorData = await response.json();
403
+ throw new Error(errorData.error || `HTTP ${response.status}`);
404
+ }
405
+
406
+ const data = await response.json();
407
+ batchResults = data.results;
408
+
409
+ // Update batch summary
410
+ const summaryDiv = document.getElementById('batch-summary');
411
+ summaryDiv.innerHTML = `
412
+ <i class="fas fa-layer-group me-2"></i>
413
+ <strong>Batch Processing Complete!</strong>
414
+ Generated ${data.successful_chunks} of ${data.total_chunks} audio chunks successfully.
415
+ ${data.successful_chunks < data.total_chunks ?
416
+ `<br><small class="text-warning">⚠️ ${data.total_chunks - data.successful_chunks} chunks failed to generate.</small>` :
417
+ '<br><small class="text-success">✅ All chunks generated successfully!</small>'
418
+ }
419
+ `;
420
+
421
+ // Display chunks
422
+ displayBatchChunks(data.results, formData.format);
423
+
424
+ // Show batch result with animation
425
+ batchResult.classList.remove('d-none');
426
+ batchResult.classList.add('fade-in');
427
+
428
+ console.log(`Batch processing completed! Generated ${data.successful_chunks} audio files.`);
429
+ }
430
+
431
+ function displayBatchChunks(results, format) {
432
+ const chunksDiv = document.getElementById('batch-chunks');
433
+ chunksDiv.innerHTML = '';
434
+
435
+ results.forEach((result, index) => {
436
+ const chunkDiv = document.createElement('div');
437
+ chunkDiv.className = 'col-md-6 col-lg-4 mb-3';
438
+
439
+ if (result.audio_data) {
440
+ // Convert base64 to blob
441
+ const audioBlob = base64ToBlob(result.audio_data, result.content_type);
442
+ const audioUrl = URL.createObjectURL(audioBlob);
443
+
444
+ chunkDiv.innerHTML = `
445
+ <div class="card batch-chunk-card h-100">
446
+ <div class="card-body">
447
+ <div class="d-flex justify-content-between align-items-start mb-2">
448
+ <h6 class="card-title mb-0">
449
+ <i class="fas fa-music me-1"></i>Chunk ${result.chunk_index}
450
+ </h6>
451
+ <span class="badge bg-success">
452
+ <i class="fas fa-check me-1"></i>Success
453
+ </span>
454
+ </div>
455
+ <p class="card-text small text-muted mb-3">${result.chunk_text}</p>
456
+ <audio controls class="w-100 mb-3" preload="metadata">
457
+ <source src="${audioUrl}" type="${result.content_type}">
458
+ Your browser does not support audio playback.
459
+ </audio>
460
+ <div class="d-flex justify-content-between align-items-center">
461
+ <small class="text-muted">
462
+ <i class="fas fa-file-audio me-1"></i>
463
+ ${(result.size / 1024).toFixed(1)} KB
464
+ </small>
465
+ <button class="btn btn-sm btn-outline-primary download-chunk"
466
+ data-url="${audioUrl}"
467
+ data-filename="chunk_${result.chunk_index}.${result.format}"
468
+ title="Download this chunk">
469
+ <i class="fas fa-download"></i>
470
+ </button>
471
+ </div>
472
+ </div>
473
+ </div>
474
+ `;
475
+ } else {
476
+ chunkDiv.innerHTML = `
477
+ <div class="card border-danger h-100">
478
+ <div class="card-body">
479
+ <div class="d-flex justify-content-between align-items-start mb-2">
480
+ <h6 class="card-title mb-0 text-danger">
481
+ <i class="fas fa-exclamation-triangle me-1"></i>Chunk ${result.chunk_index}
482
+ </h6>
483
+ <span class="badge bg-danger">
484
+ <i class="fas fa-times me-1"></i>Failed
485
+ </span>
486
+ </div>
487
+ <p class="card-text small text-muted mb-3">${result.chunk_text}</p>
488
+ <div class="alert alert-danger small mb-0">
489
+ <i class="fas fa-exclamation-circle me-1"></i>
490
+ ${result.error}
491
+ </div>
492
+ </div>
493
+ </div>
494
+ `;
495
+ }
496
+
497
+ chunksDiv.appendChild(chunkDiv);
498
+ });
499
+
500
+ // Add download event listeners
501
+ document.querySelectorAll('.download-chunk').forEach(btn => {
502
+ btn.addEventListener('click', function() {
503
+ const url = this.dataset.url;
504
+ const filename = this.dataset.filename;
505
+ downloadFromUrl(url, filename);
506
+
507
+ // Visual feedback
508
+ const icon = this.querySelector('i');
509
+ icon.className = 'fas fa-check';
510
+ setTimeout(() => {
511
+ icon.className = 'fas fa-download';
512
+ }, 1000);
513
+ });
514
+ });
515
+ }
516
+
517
+ function downloadAudio() {
518
+ if (!currentAudioBlob) {
519
+ console.log('No audio to download');
520
+ return;
521
+ }
522
+
523
+ const url = URL.createObjectURL(currentAudioBlob);
524
+ const timestamp = new Date().toISOString().slice(0, 19).replace(/:/g, '-');
525
+ downloadFromUrl(url, `ttsfm-speech-${timestamp}.${currentFormat}`);
526
+ URL.revokeObjectURL(url);
527
+ }
528
+
529
+ function downloadAllAudio() {
530
+ const downloadButtons = document.querySelectorAll('.download-chunk');
531
+ if (downloadButtons.length === 0) {
532
+ console.log('No batch audio files to download');
533
+ return;
534
+ }
535
+
536
+ console.log(`Starting download of ${downloadButtons.length} files...`);
537
+
538
+ downloadButtons.forEach((btn, index) => {
539
+ setTimeout(() => {
540
+ btn.click();
541
+ }, index * 500); // Stagger downloads to avoid browser limits
542
+ });
543
+ }
544
+
545
+ function base64ToBlob(base64, contentType) {
546
+ const byteCharacters = atob(base64);
547
+ const byteNumbers = new Array(byteCharacters.length);
548
+ for (let i = 0; i < byteCharacters.length; i++) {
549
+ byteNumbers[i] = byteCharacters.charCodeAt(i);
550
+ }
551
+ const byteArray = new Uint8Array(byteNumbers);
552
+ return new Blob([byteArray], { type: contentType });
553
+ }
554
+
555
+ function downloadFromUrl(url, filename) {
556
+ const a = document.createElement('a');
557
+ a.href = url;
558
+ a.download = filename;
559
+ a.style.display = 'none';
560
+ document.body.appendChild(a);
561
+ a.click();
562
+ document.body.removeChild(a);
563
+ }
564
+
565
+ // New enhanced functions
566
+ function clearText() {
567
+ document.getElementById('text-input').value = '';
568
+ updateCharCount();
569
+ clearResults();
570
+ console.log('Text cleared successfully');
571
+ }
572
+
573
+ function loadRandomText() {
574
+ const randomTexts = [
575
+ // News & Information
576
+ "Breaking news: Scientists have discovered a revolutionary new method for generating incredibly natural synthetic speech using advanced neural networks and machine learning algorithms.",
577
+ "Weather update: Today will be partly cloudy with temperatures reaching 75 degrees Fahrenheit. Light winds from the southwest at 5 to 10 miles per hour.",
578
+ "Technology report: The latest advancements in artificial intelligence are revolutionizing how we interact with digital devices and services.",
579
+
580
+ // Educational & Informative
581
+ "The human brain contains approximately 86 billion neurons, each connected to thousands of others, creating a complex network that enables consciousness, memory, and thought.",
582
+ "Photosynthesis is the process by which plants convert sunlight, carbon dioxide, and water into glucose and oxygen, forming the foundation of most life on Earth.",
583
+ "The speed of light in a vacuum is exactly 299,792,458 meters per second, making it one of the fundamental constants of physics.",
584
+
585
+ // Creative & Storytelling
586
+ "Once upon a time, in a land far away, there lived a wise old wizard who could speak to the stars and understand their ancient secrets.",
587
+ "The mysterious lighthouse stood alone on the rocky cliff, its beacon cutting through the fog like a sword of light, guiding lost ships safely home.",
588
+ "In the depths of the enchanted forest, where sunbeams danced through emerald leaves, a young adventurer discovered a hidden path to destiny.",
589
+
590
+ // Business & Professional
591
+ "Our quarterly results demonstrate strong growth across all market segments, with revenue increasing by 23% compared to the same period last year.",
592
+ "The new product launch exceeded expectations, capturing 15% market share within the first six months and establishing our brand as an industry leader.",
593
+ "We are committed to sustainable business practices that benefit our customers, employees, and the environment for generations to come.",
594
+
595
+ // Technical & Programming
596
+ "The TTSFM package provides a comprehensive API for text-to-speech generation with support for multiple voices and audio formats.",
597
+ "Machine learning algorithms process vast amounts of data to identify patterns and make predictions with remarkable accuracy.",
598
+ "Cloud computing has transformed how businesses store, process, and access their data, enabling scalability and flexibility like never before.",
599
+
600
+ // Conversational & Casual
601
+ "Welcome to TTSFM! Experience the future of text-to-speech technology with our premium AI voices.",
602
+ "Good morning! Today is a beautiful day to learn something new and explore the possibilities of text-to-speech technology.",
603
+ "Have you ever wondered what it would be like if your computer could speak with perfect human-like intonation and emotion?"
604
+ ];
605
+
606
+ const randomText = randomTexts[Math.floor(Math.random() * randomTexts.length)];
607
+ document.getElementById('text-input').value = randomText;
608
+ updateCharCount();
609
+ console.log('Random text loaded successfully');
610
+ }
611
+
612
+
613
+
614
+ function resetForm() {
615
+ // Reset form to default values
616
+ document.getElementById('text-input').value = 'Welcome to TTSFM! Experience the future of text-to-speech technology with our premium AI voices. Generate natural, expressive speech for any application.';
617
+ document.getElementById('voice-select').value = 'alloy';
618
+ document.getElementById('format-select').value = 'mp3';
619
+ document.getElementById('instructions-input').value = '';
620
+ document.getElementById('max-length-input').value = '4096';
621
+ document.getElementById('validate-length-check').checked = true;
622
+ document.getElementById('auto-split-check').checked = false;
623
+
624
+ updateCharCount();
625
+ updateGenerateButton();
626
+ clearResults();
627
+ console.log('Form reset to default values');
628
+ }
629
+
630
+ function replayAudio() {
631
+ const audioPlayer = document.getElementById('audio-player');
632
+ if (audioPlayer && audioPlayer.src) {
633
+ audioPlayer.currentTime = 0;
634
+ audioPlayer.play().catch(() => {
635
+ console.log('Unable to replay audio. Please check your browser settings.');
636
+ });
637
+ }
638
+ }
639
+
640
+ function shareAudio() {
641
+ if (navigator.share && currentAudioBlob) {
642
+ const file = new File([currentAudioBlob], `ttsfm-speech.${currentFormat}`, {
643
+ type: `audio/${currentFormat}`
644
+ });
645
+
646
+ navigator.share({
647
+ title: 'TTSFM Generated Speech',
648
+ text: 'Check out this speech generated with TTSFM!',
649
+ files: [file]
650
+ }).catch(() => {
651
+ // Fallback to copying link
652
+ copyAudioLink();
653
+ });
654
+ } else {
655
+ copyAudioLink();
656
+ }
657
+ }
658
+
659
+ function copyAudioLink() {
660
+ const audioPlayer = document.getElementById('audio-player');
661
+ if (audioPlayer && audioPlayer.src) {
662
+ navigator.clipboard.writeText(audioPlayer.src).then(() => {
663
+ console.log('Audio link copied to clipboard!');
664
+ }).catch(() => {
665
+ console.log('Unable to copy link. Please try downloading the audio instead.');
666
+ });
667
+ }
668
+ }
669
+
670
+ function updateVoiceInfo() {
671
+ const voiceSelect = document.getElementById('voice-select');
672
+ const previewBtn = document.getElementById('preview-voice-btn');
673
+
674
+ if (voiceSelect.value) {
675
+ previewBtn.disabled = false;
676
+ previewBtn.onclick = () => previewVoice(voiceSelect.value);
677
+ } else {
678
+ previewBtn.disabled = true;
679
+ }
680
+ }
681
+
682
+ function updateFormatInfo() {
683
+ const formatSelect = document.getElementById('format-select');
684
+ const formatInfo = document.getElementById('format-info');
685
+
686
+ const formatDescriptions = {
687
+ 'mp3': '🎵 MP3 - Good quality, small file size. Best for web and general use.',
688
+ 'opus': '📻 OPUS - Excellent quality, small file size. Best for streaming and VoIP.',
689
+ 'aac': '📱 AAC - Good quality, medium file size. Best for Apple devices and streaming.',
690
+ 'flac': '💿 FLAC - Lossless quality, large file size. Best for archival and high-quality audio.',
691
+ 'wav': '🎧 WAV - Lossless quality, large file size. Best for professional audio production.',
692
+ 'pcm': '🔊 PCM - Raw audio data, large file size. Best for audio processing.'
693
+ };
694
+
695
+ if (formatInfo && formatSelect.value) {
696
+ formatInfo.textContent = formatDescriptions[formatSelect.value] || 'High-quality audio format';
697
+ }
698
+ }
699
+
700
+ function previewVoice(voiceId) {
701
+ // This would typically play a short preview of the voice
702
+ console.log(`Voice preview for ${voiceId} - Feature coming soon!`);
703
+ }
704
+
705
+ // Enhanced audio result display
706
+ function displayAudioResult(audioBlob, format, voice, text) {
707
+ const audioResult = document.getElementById('audio-result');
708
+ const audioPlayer = document.getElementById('audio-player');
709
+ const audioInfo = document.getElementById('audio-info');
710
+
711
+ // Create audio URL and setup player
712
+ const audioUrl = URL.createObjectURL(audioBlob);
713
+ audioPlayer.src = audioUrl;
714
+
715
+ // Update audio stats
716
+ const sizeKB = (audioBlob.size / 1024).toFixed(1);
717
+ document.getElementById('audio-size').textContent = `${sizeKB} KB`;
718
+ document.getElementById('audio-format').textContent = format.toUpperCase();
719
+ document.getElementById('audio-voice').textContent = voice.charAt(0).toUpperCase() + voice.slice(1);
720
+
721
+ // Update audio info
722
+ audioInfo.innerHTML = `
723
+ <i class="fas fa-check-circle text-success me-1"></i>
724
+ Generated successfully • ${sizeKB} KB • ${format.toUpperCase()}
725
+ `;
726
+
727
+ // Show result with animation
728
+ audioResult.classList.remove('d-none');
729
+ audioResult.classList.add('fade-in');
730
+
731
+ // Update duration when metadata loads
732
+ audioPlayer.addEventListener('loadedmetadata', function() {
733
+ const duration = Math.round(audioPlayer.duration);
734
+ document.getElementById('audio-duration').textContent = `${duration}s`;
735
+ }, { once: true });
736
+
737
+ // Scroll to result
738
+ audioResult.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
739
+ }
740
+
741
+ // Export functions for use in HTML
742
+ window.enableAutoSplit = enableAutoSplit;
743
+ window.clearText = clearText;
744
+ window.loadRandomText = loadRandomText;
745
+ window.resetForm = resetForm;
ttsfm-web/templates/base.html ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>{% block title %}TTSFM - Text-to-Speech{% endblock %}</title>
7
+
8
+ <!-- Bootstrap CSS -->
9
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
10
+
11
+ <!-- Font Awesome -->
12
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
13
+
14
+ <!-- Google Fonts -->
15
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
16
+
17
+ <!-- Custom CSS -->
18
+ <link href="{{ url_for('static', filename='css/style.css') }}" rel="stylesheet">
19
+
20
+ <!-- Additional Performance Optimizations -->
21
+ <link rel="preconnect" href="https://fonts.googleapis.com">
22
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
23
+
24
+ <!-- Favicon -->
25
+ <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🎤</text></svg>">
26
+
27
+ <!-- Meta tags for better SEO and social sharing -->
28
+ <meta name="description" content="TTSFM - A Python client for text-to-speech APIs. Simple to use with support for multiple voices and audio formats.">
29
+ <meta name="keywords" content="text-to-speech, TTS, python, API, voice synthesis, audio generation">
30
+ <meta name="author" content="TTSFM">
31
+
32
+ <!-- Open Graph / Facebook -->
33
+ <meta property="og:type" content="website">
34
+ <meta property="og:url" content="{{ request.url }}">
35
+ <meta property="og:title" content="{% block og_title %}TTSFM - Python Text-to-Speech Client{% endblock %}">
36
+ <meta property="og:description" content="A Python client for text-to-speech APIs. Simple to use with support for multiple voices and audio formats.">
37
+
38
+ <!-- Twitter -->
39
+ <meta property="twitter:card" content="summary">
40
+ <meta property="twitter:url" content="{{ request.url }}">
41
+ <meta property="twitter:title" content="{% block twitter_title %}TTSFM - Python Text-to-Speech Client{% endblock %}">
42
+ <meta property="twitter:description" content="A Python client for text-to-speech APIs. Simple to use with support for multiple voices and audio formats.">
43
+
44
+ {% block extra_css %}{% endblock %}
45
+ </head>
46
+ <body>
47
+ <!-- Skip to content link for accessibility -->
48
+ <a href="#main-content" class="skip-link">Skip to main content</a>
49
+
50
+ <!-- Clean Navigation -->
51
+ <nav class="navbar navbar-expand-lg fixed-top" style="background-color: rgba(255, 255, 255, 0.95); backdrop-filter: blur(10px); border-bottom: 1px solid #e5e7eb;">
52
+ <div class="container">
53
+ <a class="navbar-brand" href="{{ url_for('index') }}">
54
+ <i class="fas fa-microphone-alt me-2"></i>
55
+ <span class="fw-bold">TTSFM</span>
56
+ <span class="badge bg-primary ms-2 small">v3.0</span>
57
+ </a>
58
+
59
+ <button class="navbar-toggler border-0" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
60
+ <span class="navbar-toggler-icon"></span>
61
+ </button>
62
+
63
+ <div class="collapse navbar-collapse" id="navbarNav">
64
+ <ul class="navbar-nav me-auto">
65
+ <li class="nav-item">
66
+ <a class="nav-link" href="{{ url_for('index') }}" aria-label="Home page">
67
+ <i class="fas fa-home me-1"></i>Home
68
+ </a>
69
+ </li>
70
+ <li class="nav-item">
71
+ <a class="nav-link" href="{{ url_for('playground') }}" aria-label="Interactive playground">
72
+ <i class="fas fa-play me-1"></i>Playground
73
+ </a>
74
+ </li>
75
+ <li class="nav-item">
76
+ <a class="nav-link" href="{{ url_for('docs') }}" aria-label="API documentation">
77
+ <i class="fas fa-book me-1"></i>Documentation
78
+ </a>
79
+ </li>
80
+ </ul>
81
+
82
+ <ul class="navbar-nav">
83
+ <li class="nav-item">
84
+ <span class="navbar-text d-flex align-items-center">
85
+ <span id="status-indicator" class="status-indicator status-offline" aria-hidden="true"></span>
86
+ <span id="status-text" class="small">Checking...</span>
87
+ </span>
88
+ </li>
89
+ <li class="nav-item ms-2">
90
+ <a class="btn btn-outline-primary btn-sm" href="https://github.com/dbccccccc/ttsfm" target="_blank" rel="noopener noreferrer" aria-label="View source code on GitHub">
91
+ <i class="fab fa-github me-1"></i>GitHub
92
+ </a>
93
+ </li>
94
+ </ul>
95
+ </div>
96
+ </div>
97
+ </nav>
98
+
99
+ <!-- Main Content -->
100
+ <main id="main-content" style="padding-top: 76px;">
101
+ {% block content %}{% endblock %}
102
+ </main>
103
+
104
+ <!-- Simplified Footer -->
105
+ <footer class="footer py-4" style="background-color: #f8fafc; border-top: 1px solid #e5e7eb;" role="contentinfo">
106
+ <div class="container">
107
+ <div class="row align-items-center">
108
+ <div class="col-md-6">
109
+ <div class="d-flex align-items-center mb-2 mb-md-0">
110
+ <i class="fas fa-microphone-alt me-2 text-primary"></i>
111
+ <strong class="text-dark">TTSFM</strong>
112
+ <span class="ms-2 text-muted">Free Text-to-Speech for Python</span>
113
+ </div>
114
+ </div>
115
+ <div class="col-md-6 text-md-end">
116
+ <div class="d-flex justify-content-md-end gap-3">
117
+ <a href="{{ url_for('playground') }}" class="text-decoration-none" style="color: #6b7280;">
118
+ <i class="fas fa-play me-1"></i>Demo
119
+ </a>
120
+ <a href="{{ url_for('docs') }}" class="text-decoration-none" style="color: #6b7280;">
121
+ <i class="fas fa-book me-1"></i>Docs
122
+ </a>
123
+ <a href="https://github.com/dbccccccc/ttsfm" class="text-decoration-none" style="color: #6b7280;" target="_blank" rel="noopener noreferrer">
124
+ <i class="fab fa-github me-1"></i>GitHub
125
+ </a>
126
+ </div>
127
+ </div>
128
+ </div>
129
+ <hr class="my-3" style="border-color: #e5e7eb;">
130
+ <div class="row align-items-center">
131
+ <div class="col-md-6">
132
+ <small class="text-muted">&copy; 2024 TTSFM. MIT License.</small>
133
+ </div>
134
+ <div class="col-md-6 text-md-end">
135
+ <small class="text-muted">
136
+ <span id="footer-status" class="d-inline-flex align-items-center">
137
+ <span class="status-indicator status-offline me-2"></span>
138
+ Status: <span id="footer-status-text" class="ms-1">Checking...</span>
139
+ </span>
140
+ </small>
141
+ </div>
142
+ </div>
143
+ </div>
144
+ </footer>
145
+
146
+ <!-- Bootstrap JS -->
147
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
148
+
149
+ <!-- Enhanced Common JavaScript -->
150
+ <script>
151
+ // Enhanced service status checking
152
+ async function checkStatus() {
153
+ try {
154
+ const response = await fetch('/api/health');
155
+ const data = await response.json();
156
+
157
+ const indicator = document.getElementById('status-indicator');
158
+ const text = document.getElementById('status-text');
159
+ const footerIndicator = document.querySelector('#footer-status .status-indicator');
160
+ const footerText = document.getElementById('footer-status-text');
161
+
162
+ if (response.ok && data.status === 'healthy') {
163
+ // Update navbar status
164
+ indicator.className = 'status-indicator status-online';
165
+ text.textContent = 'Online';
166
+
167
+ // Update footer status
168
+ if (footerIndicator) footerIndicator.className = 'status-indicator status-online';
169
+ if (footerText) footerText.textContent = 'Online';
170
+ } else {
171
+ // Update navbar status
172
+ indicator.className = 'status-indicator status-offline';
173
+ text.textContent = 'Offline';
174
+
175
+ // Update footer status
176
+ if (footerIndicator) footerIndicator.className = 'status-indicator status-offline';
177
+ if (footerText) footerText.textContent = 'Offline';
178
+ }
179
+ } catch (error) {
180
+ // Update navbar status
181
+ const indicator = document.getElementById('status-indicator');
182
+ const text = document.getElementById('status-text');
183
+ indicator.className = 'status-indicator status-offline';
184
+ text.textContent = 'Offline';
185
+
186
+ // Update footer status
187
+ const footerIndicator = document.querySelector('#footer-status .status-indicator');
188
+ const footerText = document.getElementById('footer-status-text');
189
+ if (footerIndicator) footerIndicator.className = 'status-indicator status-offline';
190
+ if (footerText) footerText.textContent = 'Offline';
191
+ }
192
+ }
193
+
194
+ // Enhanced page initialization
195
+ document.addEventListener('DOMContentLoaded', function() {
196
+ // Check status immediately and periodically
197
+ checkStatus();
198
+ setInterval(checkStatus, 30000); // Check every 30 seconds
199
+
200
+ // Initialize tooltips
201
+ if (typeof bootstrap !== 'undefined') {
202
+ const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
203
+ tooltipTriggerList.map(function (tooltipTriggerEl) {
204
+ return new bootstrap.Tooltip(tooltipTriggerEl);
205
+ });
206
+ }
207
+
208
+ // Add smooth scrolling for anchor links
209
+ document.querySelectorAll('a[href^="#"]').forEach(anchor => {
210
+ anchor.addEventListener('click', function (e) {
211
+ const target = document.querySelector(this.getAttribute('href'));
212
+ if (target) {
213
+ e.preventDefault();
214
+ target.scrollIntoView({
215
+ behavior: 'smooth',
216
+ block: 'start'
217
+ });
218
+ }
219
+ });
220
+ });
221
+
222
+ // Add fade-in animation to main content
223
+ const mainContent = document.querySelector('main');
224
+ if (mainContent) {
225
+ mainContent.classList.add('fade-in');
226
+ }
227
+
228
+ // Add loading states to external links
229
+ document.querySelectorAll('a[target="_blank"]').forEach(link => {
230
+ link.addEventListener('click', function() {
231
+ this.style.opacity = '0.7';
232
+ setTimeout(() => {
233
+ this.style.opacity = '1';
234
+ }, 1000);
235
+ });
236
+ });
237
+ });
238
+
239
+ // Enhanced utility function to show loading state
240
+ function setLoading(button, loading) {
241
+ if (loading) {
242
+ button.classList.add('loading');
243
+ button.disabled = true;
244
+ button.style.cursor = 'wait';
245
+ } else {
246
+ button.classList.remove('loading');
247
+ button.disabled = false;
248
+ button.style.cursor = 'pointer';
249
+ }
250
+ }
251
+
252
+ // Enhanced utility function to show alerts
253
+ function showAlert(message, type = 'info', duration = 5000) {
254
+ const alertDiv = document.createElement('div');
255
+ alertDiv.className = `alert alert-${type} alert-dismissible fade show fade-in`;
256
+ alertDiv.style.position = 'relative';
257
+ alertDiv.style.zIndex = '1050';
258
+ alertDiv.innerHTML = `
259
+ <i class="fas fa-${getAlertIcon(type)} me-2"></i>
260
+ ${message}
261
+ <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
262
+ `;
263
+
264
+ // Find the best container to insert the alert
265
+ const container = document.querySelector('main .container') || document.querySelector('.container') || document.body;
266
+ if (container) {
267
+ container.insertBefore(alertDiv, container.firstChild);
268
+
269
+ // Auto-dismiss after specified duration
270
+ setTimeout(() => {
271
+ if (alertDiv.parentNode) {
272
+ alertDiv.classList.remove('show');
273
+ setTimeout(() => {
274
+ if (alertDiv.parentNode) {
275
+ alertDiv.remove();
276
+ }
277
+ }, 150);
278
+ }
279
+ }, duration);
280
+
281
+ // Scroll to alert if it's not visible
282
+ alertDiv.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
283
+ }
284
+ }
285
+
286
+ // Helper function to get appropriate icon for alert type
287
+ function getAlertIcon(type) {
288
+ const icons = {
289
+ 'success': 'check-circle',
290
+ 'danger': 'exclamation-triangle',
291
+ 'warning': 'exclamation-triangle',
292
+ 'info': 'info-circle',
293
+ 'primary': 'info-circle'
294
+ };
295
+ return icons[type] || 'info-circle';
296
+ }
297
+
298
+ // Enhanced error handling for fetch requests
299
+ async function safeFetch(url, options = {}) {
300
+ try {
301
+ const response = await fetch(url, options);
302
+ if (!response.ok) {
303
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
304
+ }
305
+ return response;
306
+ } catch (error) {
307
+ console.error('Fetch error:', error);
308
+ showAlert(`Network error: ${error.message}`, 'danger');
309
+ throw error;
310
+ }
311
+ }
312
+
313
+ // Performance monitoring
314
+ window.addEventListener('load', function() {
315
+ // Log page load time
316
+ const loadTime = performance.now();
317
+ console.log(`Page loaded in ${Math.round(loadTime)}ms`);
318
+
319
+ // Check for slow loading resources
320
+ if (loadTime > 3000) {
321
+ console.warn('Page load time is slow. Consider optimizing resources.');
322
+ }
323
+ });
324
+
325
+ // Keyboard shortcuts
326
+ document.addEventListener('keydown', function(e) {
327
+ // Alt + H for home
328
+ if (e.altKey && e.key === 'h') {
329
+ e.preventDefault();
330
+ window.location.href = '{{ url_for("index") }}';
331
+ }
332
+
333
+ // Alt + P for playground
334
+ if (e.altKey && e.key === 'p') {
335
+ e.preventDefault();
336
+ window.location.href = '{{ url_for("playground") }}';
337
+ }
338
+
339
+ // Alt + D for docs
340
+ if (e.altKey && e.key === 'd') {
341
+ e.preventDefault();
342
+ window.location.href = '{{ url_for("docs") }}';
343
+ }
344
+ });
345
+ </script>
346
+
347
+ {% block extra_js %}{% endblock %}
348
+ </body>
349
+ </html>
ttsfm-web/templates/docs.html ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}TTSFM API Documentation{% endblock %}
4
+
5
+ {% block extra_css %}
6
+ <style>
7
+ .code-block {
8
+ background-color: #f8f9fa;
9
+ border: 1px solid #e9ecef;
10
+ border-radius: 0.375rem;
11
+ padding: 1rem;
12
+ margin: 1rem 0;
13
+ overflow-x: auto;
14
+ }
15
+
16
+ .endpoint-card {
17
+ border-left: 4px solid #007bff;
18
+ margin-bottom: 2rem;
19
+ }
20
+
21
+ .method-badge {
22
+ font-size: 0.75rem;
23
+ padding: 0.25rem 0.5rem;
24
+ border-radius: 0.25rem;
25
+ font-weight: bold;
26
+ margin-right: 0.5rem;
27
+ }
28
+
29
+ .method-get { background-color: #28a745; color: white; }
30
+ .method-post { background-color: #007bff; color: white; }
31
+ .method-put { background-color: #ffc107; color: black; }
32
+ .method-delete { background-color: #dc3545; color: white; }
33
+
34
+ .response-example {
35
+ background-color: #f1f3f4;
36
+ border-radius: 0.375rem;
37
+ padding: 1rem;
38
+ margin-top: 1rem;
39
+ }
40
+
41
+ .toc {
42
+ position: sticky;
43
+ top: 2rem;
44
+ max-height: calc(100vh - 4rem);
45
+ overflow-y: auto;
46
+ }
47
+
48
+ .toc a {
49
+ color: #6c757d;
50
+ text-decoration: none;
51
+ display: block;
52
+ padding: 0.25rem 0;
53
+ border-left: 2px solid transparent;
54
+ padding-left: 1rem;
55
+ }
56
+
57
+ .toc a:hover, .toc a.active {
58
+ color: #007bff;
59
+ border-left-color: #007bff;
60
+ }
61
+ </style>
62
+ {% endblock %}
63
+
64
+ {% block content %}
65
+ <div class="container py-5">
66
+ <div class="row">
67
+ <div class="col-12 text-center mb-5">
68
+ <h1 class="display-4 fw-bold">
69
+ <i class="fas fa-book me-3"></i>API Documentation
70
+ </h1>
71
+ <p class="lead text-muted">
72
+ Complete reference for the TTSFM Text-to-Speech API
73
+ </p>
74
+ </div>
75
+ </div>
76
+
77
+ <div class="row">
78
+ <!-- Table of Contents -->
79
+ <div class="col-lg-3">
80
+ <div class="toc">
81
+ <h5 class="fw-bold mb-3">Contents</h5>
82
+ <a href="#overview">Overview</a>
83
+ <a href="#authentication">Authentication</a>
84
+ <a href="#text-validation">Text Validation</a>
85
+ <a href="#endpoints">API Endpoints</a>
86
+ <a href="#voices">Voices</a>
87
+ <a href="#formats">Audio Formats</a>
88
+ <a href="#generate">Generate Speech</a>
89
+ <a href="#batch">Batch Processing</a>
90
+ <a href="#status">Status & Health</a>
91
+ <a href="#errors">Error Handling</a>
92
+ <a href="#examples">Code Examples</a>
93
+ <a href="#python-package">Python Package</a>
94
+ </div>
95
+ </div>
96
+
97
+ <!-- Documentation Content -->
98
+ <div class="col-lg-9">
99
+ <!-- Overview -->
100
+ <section id="overview" class="mb-5">
101
+ <h2 class="fw-bold mb-3">Overview</h2>
102
+ <p>
103
+ The TTSFM API provides a modern, OpenAI-compatible interface for text-to-speech generation.
104
+ It supports multiple voices, audio formats, and includes advanced features like text length
105
+ validation and batch processing.
106
+ </p>
107
+
108
+ <div class="alert alert-info">
109
+ <i class="fas fa-info-circle me-2"></i>
110
+ <strong>Base URL:</strong> <code>{{ request.url_root }}api/</code>
111
+ </div>
112
+
113
+ <h4>Key Features</h4>
114
+ <ul>
115
+ <li>11 different voice options</li>
116
+ <li>Multiple audio formats (MP3, WAV, OPUS, etc.)</li>
117
+ <li>Text length validation (4096 character limit)</li>
118
+ <li>Automatic text splitting for long content</li>
119
+ <li>Batch processing capabilities</li>
120
+ <li>Real-time status monitoring</li>
121
+ </ul>
122
+ </section>
123
+
124
+ <!-- Authentication -->
125
+ <section id="authentication" class="mb-5">
126
+ <h2 class="fw-bold mb-3">Authentication</h2>
127
+ <p>
128
+ Currently, the API supports optional API key authentication. If configured,
129
+ include your API key in the request headers.
130
+ </p>
131
+
132
+ <div class="code-block">
133
+ <pre><code>Authorization: Bearer YOUR_API_KEY</code></pre>
134
+ </div>
135
+ </section>
136
+
137
+ <!-- Text Validation -->
138
+ <section id="text-validation" class="mb-5">
139
+ <h2 class="fw-bold mb-3">Text Length Validation</h2>
140
+ <p>
141
+ TTSFM includes built-in text length validation to ensure compatibility with TTS models.
142
+ The default maximum length is 4096 characters, but this can be customized.
143
+ </p>
144
+
145
+ <div class="alert alert-warning">
146
+ <i class="fas fa-exclamation-triangle me-2"></i>
147
+ <strong>Important:</strong> Text exceeding the maximum length will be rejected unless
148
+ validation is disabled or the text is split into chunks.
149
+ </div>
150
+
151
+ <h4>Validation Options</h4>
152
+ <ul>
153
+ <li><code>max_length</code>: Maximum allowed characters (default: 4096)</li>
154
+ <li><code>validate_length</code>: Enable/disable validation (default: true)</li>
155
+ <li><code>preserve_words</code>: Avoid splitting words when chunking (default: true)</li>
156
+ </ul>
157
+ </section>
158
+
159
+ <!-- API Endpoints -->
160
+ <section id="endpoints" class="mb-5">
161
+ <h2 class="fw-bold mb-3">API Endpoints</h2>
162
+
163
+ <!-- Voices Endpoint -->
164
+ <div class="card endpoint-card" id="voices">
165
+ <div class="card-body">
166
+ <h4 class="card-title">
167
+ <span class="method-badge method-get">GET</span>
168
+ /api/voices
169
+ </h4>
170
+ <p class="card-text">Get list of available voices.</p>
171
+
172
+ <h6>Response Example:</h6>
173
+ <div class="response-example">
174
+ <pre><code>{
175
+ "voices": [
176
+ {
177
+ "id": "alloy",
178
+ "name": "Alloy",
179
+ "description": "Alloy voice"
180
+ },
181
+ {
182
+ "id": "echo",
183
+ "name": "Echo",
184
+ "description": "Echo voice"
185
+ }
186
+ ],
187
+ "count": 6
188
+ }</code></pre>
189
+ </div>
190
+ </div>
191
+ </div>
192
+
193
+ <!-- Formats Endpoint -->
194
+ <div class="card endpoint-card" id="formats">
195
+ <div class="card-body">
196
+ <h4 class="card-title">
197
+ <span class="method-badge method-get">GET</span>
198
+ /api/formats
199
+ </h4>
200
+ <p class="card-text">Get list of supported audio formats.</p>
201
+
202
+ <h6>Response Example:</h6>
203
+ <div class="response-example">
204
+ <pre><code>{
205
+ "formats": [
206
+ {
207
+ "id": "mp3",
208
+ "name": "MP3",
209
+ "mime_type": "audio/mp3",
210
+ "description": "MP3 audio format"
211
+ }
212
+ ],
213
+ "count": 6
214
+ }</code></pre>
215
+ </div>
216
+ </div>
217
+ </div>
218
+
219
+ <!-- Text Validation Endpoint -->
220
+ <div class="card endpoint-card">
221
+ <div class="card-body">
222
+ <h4 class="card-title">
223
+ <span class="method-badge method-post">POST</span>
224
+ /api/validate-text
225
+ </h4>
226
+ <p class="card-text">Validate text length and get splitting suggestions.</p>
227
+
228
+ <h6>Request Body:</h6>
229
+ <div class="code-block">
230
+ <pre><code>{
231
+ "text": "Your text to validate",
232
+ "max_length": 4096
233
+ }</code></pre>
234
+ </div>
235
+
236
+ <h6>Response Example:</h6>
237
+ <div class="response-example">
238
+ <pre><code>{
239
+ "text_length": 5000,
240
+ "max_length": 4096,
241
+ "is_valid": false,
242
+ "needs_splitting": true,
243
+ "suggested_chunks": 2,
244
+ "chunk_preview": [
245
+ "First chunk preview...",
246
+ "Second chunk preview..."
247
+ ]
248
+ }</code></pre>
249
+ </div>
250
+ </div>
251
+ </div>
252
+
253
+ <!-- Generate Speech Endpoint -->
254
+ <div class="card endpoint-card" id="generate">
255
+ <div class="card-body">
256
+ <h4 class="card-title">
257
+ <span class="method-badge method-post">POST</span>
258
+ /api/generate
259
+ </h4>
260
+ <p class="card-text">Generate speech from text.</p>
261
+
262
+ <h6>Request Body:</h6>
263
+ <div class="code-block">
264
+ <pre><code>{
265
+ "text": "Hello, world!",
266
+ "voice": "alloy",
267
+ "format": "mp3",
268
+ "instructions": "Speak cheerfully",
269
+ "max_length": 4096,
270
+ "validate_length": true
271
+ }</code></pre>
272
+ </div>
273
+
274
+ <h6>Parameters:</h6>
275
+ <ul>
276
+ <li><code>text</code> (required): Text to convert to speech</li>
277
+ <li><code>voice</code> (optional): Voice ID (default: "alloy")</li>
278
+ <li><code>format</code> (optional): Audio format (default: "mp3")</li>
279
+ <li><code>instructions</code> (optional): Voice modulation instructions</li>
280
+ <li><code>max_length</code> (optional): Maximum text length (default: 4096)</li>
281
+ <li><code>validate_length</code> (optional): Enable validation (default: true)</li>
282
+ </ul>
283
+
284
+ <h6>Response:</h6>
285
+ <p>Returns audio file with appropriate Content-Type header.</p>
286
+ </div>
287
+ </div>
288
+
289
+ <!-- Batch Processing Endpoint -->
290
+ <div class="card endpoint-card" id="batch">
291
+ <div class="card-body">
292
+ <h4 class="card-title">
293
+ <span class="method-badge method-post">POST</span>
294
+ /api/generate-batch
295
+ </h4>
296
+ <p class="card-text">Generate speech from long text by automatically splitting into chunks.</p>
297
+
298
+ <h6>Request Body:</h6>
299
+ <div class="code-block">
300
+ <pre><code>{
301
+ "text": "Very long text that exceeds the limit...",
302
+ "voice": "alloy",
303
+ "format": "mp3",
304
+ "max_length": 4096,
305
+ "preserve_words": true
306
+ }</code></pre>
307
+ </div>
308
+
309
+ <h6>Response Example:</h6>
310
+ <div class="response-example">
311
+ <pre><code>{
312
+ "total_chunks": 3,
313
+ "successful_chunks": 3,
314
+ "results": [
315
+ {
316
+ "chunk_index": 1,
317
+ "chunk_text": "First chunk text...",
318
+ "audio_data": "base64_encoded_audio",
319
+ "content_type": "audio/mp3",
320
+ "size": 12345,
321
+ "format": "mp3"
322
+ }
323
+ ]
324
+ }</code></pre>
325
+ </div>
326
+ </div>
327
+ </div>
328
+ </section>
329
+ </div>
330
+ </div>
331
+ </div>
332
+ {% endblock %}
333
+
334
+ {% block extra_js %}
335
+ <script>
336
+ // Smooth scrolling for TOC links
337
+ document.querySelectorAll('.toc a').forEach(link => {
338
+ link.addEventListener('click', function(e) {
339
+ e.preventDefault();
340
+ const target = document.querySelector(this.getAttribute('href'));
341
+ if (target) {
342
+ target.scrollIntoView({ behavior: 'smooth' });
343
+
344
+ // Update active link
345
+ document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
346
+ this.classList.add('active');
347
+ }
348
+ });
349
+ });
350
+
351
+ // Highlight current section in TOC
352
+ window.addEventListener('scroll', function() {
353
+ const sections = document.querySelectorAll('section[id]');
354
+ const scrollPos = window.scrollY + 100;
355
+
356
+ sections.forEach(section => {
357
+ const top = section.offsetTop;
358
+ const bottom = top + section.offsetHeight;
359
+ const id = section.getAttribute('id');
360
+ const link = document.querySelector(`.toc a[href="#${id}"]`);
361
+
362
+ if (scrollPos >= top && scrollPos < bottom) {
363
+ document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
364
+ if (link) link.classList.add('active');
365
+ }
366
+ });
367
+ });
368
+ </script>
369
+ {% endblock %}
ttsfm-web/templates/index.html ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}TTSFM - Free Text-to-Speech for Python{% endblock %}
4
+
5
+ {% block content %}
6
+ <!-- Hero Section -->
7
+ <section class="hero-section">
8
+ <div class="container">
9
+ <div class="row align-items-center min-vh-75">
10
+ <div class="col-lg-8 mx-auto text-center">
11
+ <div class="hero-content">
12
+ <div class="badge bg-primary text-white mb-3 px-3 py-2">
13
+ <i class="fas fa-code me-2"></i>Python Package
14
+ </div>
15
+ <h1 class="display-4 fw-bold mb-4">
16
+ Free Text-to-Speech for Python
17
+ </h1>
18
+ <p class="lead mb-4">
19
+ Access free text-to-speech using openai.fm's service. No API keys required,
20
+ just install and use immediately.
21
+ </p>
22
+ <div class="d-flex flex-wrap gap-3 justify-content-center">
23
+ <a href="{{ url_for('playground') }}" class="btn btn-primary btn-lg">
24
+ <i class="fas fa-play me-2"></i>Try Demo
25
+ </a>
26
+ <a href="{{ url_for('docs') }}" class="btn btn-outline-secondary btn-lg">
27
+ <i class="fas fa-book me-2"></i>Documentation
28
+ </a>
29
+ <a href="https://github.com/dbccccccc/ttsfm" class="btn btn-outline-secondary btn-lg" target="_blank" rel="noopener noreferrer">
30
+ <i class="fab fa-github me-2"></i>GitHub
31
+ </a>
32
+ </div>
33
+ </div>
34
+ </div>
35
+ </div>
36
+ </div>
37
+ </section>
38
+
39
+ <!-- Features Section -->
40
+ <section class="py-5" style="background-color: #f8fafc;">
41
+ <div class="container">
42
+ <div class="row">
43
+ <div class="col-12 text-center mb-5">
44
+ <h2 class="fw-bold mb-4">Key Features</h2>
45
+ <p class="lead text-muted">
46
+ Simple, free, and powerful text-to-speech for Python developers.
47
+ </p>
48
+ </div>
49
+ </div>
50
+
51
+ <div class="row g-4">
52
+ <div class="col-lg-4">
53
+ <div class="text-center">
54
+ <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background-color: #2563eb;">
55
+ <i class="fas fa-key"></i>
56
+ </div>
57
+ <h5 class="fw-bold">No API Keys</h5>
58
+ <p class="text-muted">Completely free service with no registration or API keys required.</p>
59
+ </div>
60
+ </div>
61
+
62
+ <div class="col-lg-4">
63
+ <div class="text-center">
64
+ <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background-color: #10b981;">
65
+ <i class="fas fa-bolt"></i>
66
+ </div>
67
+ <h5 class="fw-bold">Easy to Use</h5>
68
+ <p class="text-muted">Simple Python API with both sync and async support for all use cases.</p>
69
+ </div>
70
+ </div>
71
+
72
+ <div class="col-lg-4">
73
+ <div class="text-center">
74
+ <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background-color: #64748b;">
75
+ <i class="fas fa-microphone-alt"></i>
76
+ </div>
77
+ <h5 class="fw-bold">Multiple Voices</h5>
78
+ <p class="text-muted">Access to various voice options and audio formats for your needs.</p>
79
+ </div>
80
+ </div>
81
+ </div>
82
+ </div>
83
+ </section>
84
+
85
+ <!-- Quick Start Section -->
86
+ <section class="py-5">
87
+ <div class="container">
88
+ <div class="row">
89
+ <div class="col-12 text-center mb-5">
90
+ <h2 class="fw-bold mb-4">Getting Started</h2>
91
+ <p class="lead text-muted">
92
+ Install TTSFM and start generating speech with just a few lines of code.
93
+ </p>
94
+ </div>
95
+ </div>
96
+
97
+ <div class="row g-4">
98
+ <div class="col-lg-6">
99
+ <div class="card h-100">
100
+ <div class="card-body">
101
+ <h5 class="card-title">
102
+ <i class="fas fa-download me-2 text-primary"></i>Installation
103
+ </h5>
104
+ <pre class="bg-light p-3 rounded"><code>pip install ttsfm</code></pre>
105
+ <small class="text-muted">Requires Python 3.8+</small>
106
+ </div>
107
+ </div>
108
+ </div>
109
+
110
+ <div class="col-lg-6">
111
+ <div class="card h-100">
112
+ <div class="card-body">
113
+ <h5 class="card-title">
114
+ <i class="fas fa-play me-2 text-success"></i>Basic Usage
115
+ </h5>
116
+ <pre class="bg-light p-3 rounded"><code>from ttsfm import TTSClient
117
+
118
+ client = TTSClient()
119
+ response = client.generate_speech(
120
+ text="Hello, world!",
121
+ voice="alloy"
122
+ )
123
+ response.save_to_file("hello.wav")</code></pre>
124
+ <small class="text-muted">No API keys required</small>
125
+ </div>
126
+ </div>
127
+ </div>
128
+ </div>
129
+
130
+ <div class="row mt-4">
131
+ <div class="col-12 text-center">
132
+ <div class="d-flex justify-content-center gap-3 flex-wrap">
133
+ <a href="{{ url_for('playground') }}" class="btn btn-primary">
134
+ <i class="fas fa-play me-2"></i>Try Demo
135
+ </a>
136
+ <a href="{{ url_for('docs') }}" class="btn btn-outline-primary">
137
+ <i class="fas fa-book me-2"></i>Documentation
138
+ </a>
139
+ </div>
140
+ </div>
141
+ </div>
142
+ </div>
143
+ </section>
144
+
145
+
146
+ {% endblock %}
ttsfm-web/templates/playground.html ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}TTSFM Playground - Try Text-to-Speech{% endblock %}
4
+
5
+ {% block content %}
6
+ <!-- Clean Playground Header -->
7
+ <section class="py-5" style="background-color: white; border-bottom: 1px solid #e5e7eb;">
8
+ <div class="container">
9
+ <div class="row align-items-center">
10
+ <div class="col-lg-8">
11
+ <div class="fade-in">
12
+ <div class="badge bg-primary text-white mb-3 px-3 py-2">
13
+ <i class="fas fa-flask me-2"></i>Demo
14
+ </div>
15
+ <h1 class="display-4 fw-bold mb-3 text-dark">
16
+ <i class="fas fa-play-circle me-3 text-primary"></i>TTS Playground
17
+ </h1>
18
+ <p class="lead mb-4 text-muted">
19
+ Test the TTSFM text-to-speech functionality with different voices and formats.
20
+ </p>
21
+ </div>
22
+ </div>
23
+ <div class="col-lg-4 text-center">
24
+ <div class="playground-visual fade-in" style="animation-delay: 0.3s;">
25
+ <div class="playground-icon">
26
+ <i class="fas fa-waveform-lines text-primary"></i>
27
+ <div class="pulse-ring"></div>
28
+ <div class="pulse-ring pulse-ring-delay"></div>
29
+ </div>
30
+ </div>
31
+ </div>
32
+ </div>
33
+ </div>
34
+ </section>
35
+
36
+ <div class="container py-5 playground">
37
+
38
+ <div class="row">
39
+ <div class="col-lg-10 mx-auto">
40
+ <div class="card shadow-lg-custom border-0 fade-in">
41
+ <div class="card-header bg-gradient-primary text-white">
42
+ <h4 class="mb-0 d-flex align-items-center">
43
+ <i class="fas fa-microphone me-2"></i>
44
+ Text-to-Speech Generator
45
+ </h4>
46
+ </div>
47
+ <div class="card-body p-4">
48
+ <form id="tts-form">
49
+ <!-- Enhanced Text Input -->
50
+ <div class="mb-4">
51
+ <label for="text-input" class="form-label fw-bold d-flex align-items-center">
52
+ <i class="fas fa-edit me-2 text-primary"></i>
53
+ Text to Convert
54
+ </label>
55
+ <div class="position-relative">
56
+ <textarea
57
+ class="form-control shadow-sm"
58
+ id="text-input"
59
+ rows="4"
60
+ placeholder="Enter the text you want to convert to speech..."
61
+ required
62
+ >Hello! This is a test of the TTSFM text-to-speech system.</textarea>
63
+ <div class="position-absolute top-0 end-0 p-2">
64
+ <button type="button" class="btn btn-sm btn-outline-secondary" id="clear-text-btn" title="Clear text">
65
+ <i class="fas fa-times"></i>
66
+ </button>
67
+ </div>
68
+ </div>
69
+ <div class="form-text d-flex justify-content-between align-items-center">
70
+ <div class="d-flex align-items-center gap-3">
71
+ <span class="text-muted">
72
+ <i class="fas fa-keyboard me-1"></i>
73
+ <span id="char-count">0</span> characters
74
+ </span>
75
+ <span id="length-status" class=""></span>
76
+ <span class="text-muted small">
77
+ <i class="fas fa-lightbulb me-1"></i>
78
+ Tip: Use Ctrl+Enter to generate
79
+ </span>
80
+ </div>
81
+ <div class="btn-group" role="group">
82
+ <button type="button" class="btn btn-sm btn-outline-primary" id="validate-text-btn">
83
+ <i class="fas fa-check me-1"></i>Validate
84
+ </button>
85
+ <button type="button" class="btn btn-sm btn-outline-secondary" id="random-text-btn">
86
+ <i class="fas fa-dice me-1"></i>Random
87
+ </button>
88
+ </div>
89
+ </div>
90
+ <div id="validation-result" class="mt-2 d-none"></div>
91
+ </div>
92
+
93
+ <div class="row">
94
+ <!-- Enhanced Voice Selection -->
95
+ <div class="col-md-6 mb-4">
96
+ <label for="voice-select" class="form-label fw-bold d-flex align-items-center">
97
+ <i class="fas fa-microphone me-2 text-primary"></i>
98
+ Voice
99
+ </label>
100
+ <select class="form-select shadow-sm" id="voice-select" required>
101
+ <option value="">Loading voices...</option>
102
+ </select>
103
+ <div class="form-text">
104
+ <span>Choose from available voices</span>
105
+ </div>
106
+ </div>
107
+
108
+ <!-- Enhanced Format Selection -->
109
+ <div class="col-md-6 mb-4">
110
+ <label for="format-select" class="form-label fw-bold d-flex align-items-center">
111
+ <i class="fas fa-file-audio me-2 text-primary"></i>
112
+ Audio Format
113
+ </label>
114
+ <select class="form-select shadow-sm" id="format-select" required>
115
+ <option value="">Loading formats...</option>
116
+ </select>
117
+ <div class="form-text">
118
+ <span>Select your preferred audio format</span>
119
+ </div>
120
+ </div>
121
+ </div>
122
+
123
+ <!-- Advanced Options -->
124
+ <div class="row">
125
+ <div class="col-md-6 mb-4">
126
+ <label for="max-length-input" class="form-label fw-bold">
127
+ <i class="fas fa-ruler me-2"></i>Max Length
128
+ </label>
129
+ <input
130
+ type="number"
131
+ class="form-control"
132
+ id="max-length-input"
133
+ value="4096"
134
+ min="100"
135
+ max="10000"
136
+ >
137
+ <div class="form-text">
138
+ Maximum characters per request (default: 4096)
139
+ </div>
140
+ </div>
141
+
142
+ <div class="col-md-6 mb-4">
143
+ <label class="form-label fw-bold">
144
+ <i class="fas fa-cog me-2"></i>Options
145
+ </label>
146
+ <div class="form-check">
147
+ <input class="form-check-input" type="checkbox" id="validate-length-check" checked>
148
+ <label class="form-check-label" for="validate-length-check">
149
+ Enable length validation
150
+ </label>
151
+ </div>
152
+ <div class="form-check">
153
+ <input class="form-check-input" type="checkbox" id="auto-split-check">
154
+ <label class="form-check-label" for="auto-split-check">
155
+ Auto-split long text
156
+ </label>
157
+ </div>
158
+ </div>
159
+ </div>
160
+
161
+ <!-- Instructions (Optional) -->
162
+ <div class="mb-4">
163
+ <label for="instructions-input" class="form-label fw-bold">
164
+ <i class="fas fa-magic me-2"></i>Instructions (Optional)
165
+ </label>
166
+ <input
167
+ type="text"
168
+ class="form-control"
169
+ id="instructions-input"
170
+ placeholder="e.g., Speak in a cheerful and upbeat tone"
171
+ >
172
+ <div class="form-text">
173
+ Provide optional instructions for voice modulation
174
+ </div>
175
+ </div>
176
+
177
+ <!-- Enhanced Generate Button -->
178
+ <div class="text-center mb-4">
179
+ <div class="d-grid gap-2 d-md-block">
180
+ <button type="submit" class="btn btn-primary btn-lg px-4 py-3" id="generate-btn">
181
+ <span class="btn-text">
182
+ <i class="fas fa-magic me-2"></i>Generate Speech
183
+ </span>
184
+ <span class="loading-spinner">
185
+ <i class="fas fa-spinner fa-spin me-2"></i>Generating...
186
+ </span>
187
+ </button>
188
+ <button type="button" class="btn btn-outline-secondary btn-lg ms-md-3" id="reset-form-btn">
189
+ <i class="fas fa-redo me-2"></i>Reset
190
+ </button>
191
+ </div>
192
+ </div>
193
+ </form>
194
+
195
+ <!-- Enhanced Audio Player -->
196
+ <div id="audio-result" class="d-none">
197
+ <div class="border-top pt-4 mt-4">
198
+ <div class="d-flex align-items-center justify-content-between mb-3">
199
+ <h5 class="mb-0 d-flex align-items-center">
200
+ <i class="fas fa-volume-up me-2 text-success"></i>
201
+ Generated Audio
202
+ <span class="badge bg-success ms-2">
203
+ <i class="fas fa-check me-1"></i>Ready
204
+ </span>
205
+ </h5>
206
+ <div class="btn-group" role="group">
207
+ <button type="button" class="btn btn-sm btn-outline-primary" id="replay-btn" title="Replay audio">
208
+ <i class="fas fa-redo"></i>
209
+ </button>
210
+ <button type="button" class="btn btn-sm btn-outline-secondary" id="share-btn" title="Share audio">
211
+ <i class="fas fa-share"></i>
212
+ </button>
213
+ </div>
214
+ </div>
215
+
216
+ <div class="audio-player-container bg-light rounded p-3 mb-3">
217
+ <audio controls class="audio-player w-100" id="audio-player" preload="metadata">
218
+ Your browser does not support the audio element.
219
+ </audio>
220
+ <div class="audio-controls mt-2 d-flex justify-content-between align-items-center">
221
+ <div class="audio-info">
222
+ <span id="audio-info" class="text-muted small"></span>
223
+ </div>
224
+ <div class="audio-actions">
225
+ <button type="button" class="btn btn-success btn-sm" id="download-btn">
226
+ <i class="fas fa-download me-1"></i>Download
227
+ </button>
228
+ </div>
229
+ </div>
230
+ </div>
231
+
232
+ <div class="audio-stats row text-center">
233
+ <div class="col-md-3 col-6">
234
+ <div class="stat-item">
235
+ <i class="fas fa-clock text-primary"></i>
236
+ <div class="stat-value" id="audio-duration">--</div>
237
+ <div class="stat-label">Duration</div>
238
+ </div>
239
+ </div>
240
+ <div class="col-md-3 col-6">
241
+ <div class="stat-item">
242
+ <i class="fas fa-file text-info"></i>
243
+ <div class="stat-value" id="audio-size">--</div>
244
+ <div class="stat-label">File Size</div>
245
+ </div>
246
+ </div>
247
+ <div class="col-md-3 col-6">
248
+ <div class="stat-item">
249
+ <i class="fas fa-microphone text-warning"></i>
250
+ <div class="stat-value" id="audio-voice">--</div>
251
+ <div class="stat-label">Voice</div>
252
+ </div>
253
+ </div>
254
+ <div class="col-md-3 col-6">
255
+ <div class="stat-item">
256
+ <i class="fas fa-music text-success"></i>
257
+ <div class="stat-value" id="audio-format">--</div>
258
+ <div class="stat-label">Format</div>
259
+ </div>
260
+ </div>
261
+ </div>
262
+ </div>
263
+ </div>
264
+
265
+ <!-- Batch Results -->
266
+ <div id="batch-result" class="d-none">
267
+ <hr>
268
+ <h5 class="mb-3">
269
+ <i class="fas fa-layer-group me-2"></i>Batch Processing Results
270
+ </h5>
271
+ <div class="alert alert-info" id="batch-summary"></div>
272
+ <div id="batch-chunks" class="row g-3"></div>
273
+ <div class="mt-3">
274
+ <button type="button" class="btn btn-outline-primary" id="download-all-btn">
275
+ <i class="fas fa-download me-2"></i>Download All Audio Files
276
+ </button>
277
+ </div>
278
+ </div>
279
+ </div>
280
+ </div>
281
+ </div>
282
+ </div>
283
+ </div>
284
+ {% endblock %}
285
+
286
+ {% block extra_js %}
287
+ <!-- Playground JavaScript -->
288
+ <script src="{{ url_for('static', filename='js/playground.js') }}"></script>
289
+ <script>
290
+ // Additional playground-specific functionality
291
+ console.log('TTSFM Playground loaded successfully!');
292
+
293
+
294
+ </script>
295
+ {% endblock %}
ttsfm/__init__.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TTSFM - Text-to-Speech for Free using OpenAI.fm
3
+
4
+ A Python library for generating high-quality text-to-speech audio using the free OpenAI.fm service.
5
+ Supports multiple voices and audio formats with a simple, intuitive API.
6
+
7
+ Features:
8
+ - 🎤 6 premium AI voices (alloy, echo, fable, nova, onyx, shimmer)
9
+ - 🎵 6 audio formats (MP3, WAV, OPUS, AAC, FLAC, PCM)
10
+ - 🚀 Fast and reliable speech generation
11
+ - 📝 Comprehensive text processing and validation
12
+ - 🔄 Automatic retry with exponential backoff
13
+ - 📊 Detailed response metadata and statistics
14
+ - 🌐 Both synchronous and asynchronous APIs
15
+ - 🎯 OpenAI-compatible API format
16
+ - 🔧 Smart format optimization for best quality
17
+
18
+ Audio Format Support:
19
+ - MP3: Good quality, small file size - ideal for web and general use
20
+ - WAV: Lossless quality, large file size - ideal for professional use
21
+ - OPUS: High-quality compressed audio - ideal for streaming
22
+ - AAC: Advanced audio codec - ideal for mobile devices
23
+ - FLAC: Lossless compression - ideal for archival
24
+ - PCM: Raw audio data - ideal for processing
25
+
26
+ Example:
27
+ >>> from ttsfm import TTSClient, Voice, AudioFormat
28
+ >>>
29
+ >>> client = TTSClient()
30
+ >>>
31
+ >>> # Generate MP3 audio
32
+ >>> mp3_response = client.generate_speech(
33
+ ... text="Hello, world!",
34
+ ... voice=Voice.ALLOY,
35
+ ... response_format=AudioFormat.MP3
36
+ ... )
37
+ >>> mp3_response.save_to_file("hello") # Saves as hello.mp3
38
+ >>>
39
+ >>> # Generate WAV audio
40
+ >>> wav_response = client.generate_speech(
41
+ ... text="High quality audio",
42
+ ... voice=Voice.NOVA,
43
+ ... response_format=AudioFormat.WAV
44
+ ... )
45
+ >>> wav_response.save_to_file("audio") # Saves as audio.wav
46
+ >>>
47
+ >>> # Generate OPUS audio
48
+ >>> opus_response = client.generate_speech(
49
+ ... text="Compressed audio",
50
+ ... voice=Voice.ECHO,
51
+ ... response_format=AudioFormat.OPUS
52
+ ... )
53
+ >>> opus_response.save_to_file("compressed") # Saves as compressed.wav
54
+ """
55
+
56
+ from .client import TTSClient
57
+ from .async_client import AsyncTTSClient
58
+ from .models import (
59
+ TTSRequest,
60
+ TTSResponse,
61
+ Voice,
62
+ AudioFormat,
63
+ TTSError,
64
+ APIError,
65
+ NetworkError,
66
+ ValidationError
67
+ )
68
+ from .exceptions import (
69
+ TTSException,
70
+ APIException,
71
+ NetworkException,
72
+ ValidationException,
73
+ RateLimitException,
74
+ AuthenticationException
75
+ )
76
+ from .utils import (
77
+ validate_text_length,
78
+ split_text_by_length
79
+ )
80
+
81
+ __version__ = "3.0.0"
82
+ __author__ = "dbcccc"
83
+ __email__ = "[email protected]"
84
+ __description__ = "Text-to-Speech API Client with OpenAI compatibility"
85
+ __url__ = "https://github.com/dbccccccc/ttsfm"
86
+
87
+ # Default client instance for convenience
88
+ default_client = None
89
+
90
+ def create_client(base_url: str = None, api_key: str = None, **kwargs) -> TTSClient:
91
+ """
92
+ Create a new TTS client instance.
93
+
94
+ Args:
95
+ base_url: Base URL for the TTS service
96
+ api_key: API key for authentication (if required)
97
+ **kwargs: Additional client configuration
98
+
99
+ Returns:
100
+ TTSClient: Configured client instance
101
+ """
102
+ return TTSClient(base_url=base_url, api_key=api_key, **kwargs)
103
+
104
+ def create_async_client(base_url: str = None, api_key: str = None, **kwargs) -> AsyncTTSClient:
105
+ """
106
+ Create a new async TTS client instance.
107
+
108
+ Args:
109
+ base_url: Base URL for the TTS service
110
+ api_key: API key for authentication (if required)
111
+ **kwargs: Additional client configuration
112
+
113
+ Returns:
114
+ AsyncTTSClient: Configured async client instance
115
+ """
116
+ return AsyncTTSClient(base_url=base_url, api_key=api_key, **kwargs)
117
+
118
+ def set_default_client(client: TTSClient) -> None:
119
+ """Set the default client instance for convenience functions."""
120
+ global default_client
121
+ default_client = client
122
+
123
+ def generate_speech(text: str, voice: str = "alloy", **kwargs) -> bytes:
124
+ """
125
+ Convenience function to generate speech using the default client.
126
+
127
+ Args:
128
+ text: Text to convert to speech
129
+ voice: Voice to use for generation
130
+ **kwargs: Additional generation parameters
131
+
132
+ Returns:
133
+ bytes: Generated audio data
134
+
135
+ Raises:
136
+ TTSException: If no default client is set or generation fails
137
+ """
138
+ if default_client is None:
139
+ raise TTSException("No default client set. Use create_client() first.")
140
+
141
+ return default_client.generate_speech(text=text, voice=voice, **kwargs)
142
+
143
+ # Export all public components
144
+ __all__ = [
145
+ # Main classes
146
+ "TTSClient",
147
+ "AsyncTTSClient",
148
+
149
+ # Models
150
+ "TTSRequest",
151
+ "TTSResponse",
152
+ "Voice",
153
+ "AudioFormat",
154
+ "TTSError",
155
+ "APIError",
156
+ "NetworkError",
157
+ "ValidationError",
158
+
159
+ # Exceptions
160
+ "TTSException",
161
+ "APIException",
162
+ "NetworkException",
163
+ "ValidationException",
164
+ "RateLimitException",
165
+ "AuthenticationException",
166
+
167
+ # Factory functions
168
+ "create_client",
169
+ "create_async_client",
170
+ "set_default_client",
171
+ "generate_speech",
172
+
173
+ # Utility functions
174
+ "validate_text_length",
175
+ "split_text_by_length",
176
+
177
+ # Package metadata
178
+ "__version__",
179
+ "__author__",
180
+ "__email__",
181
+ "__description__",
182
+ "__url__"
183
+ ]
ttsfm/async_client.py ADDED
@@ -0,0 +1,464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Asynchronous TTS client implementation.
3
+
4
+ This module provides the AsyncTTSClient class for asynchronous
5
+ text-to-speech generation with OpenAI-compatible API.
6
+ """
7
+
8
+ import json
9
+ import uuid
10
+ import asyncio
11
+ import logging
12
+ from typing import Optional, Dict, Any, Union, List
13
+
14
+ import aiohttp
15
+ from aiohttp import ClientTimeout, ClientSession
16
+
17
+ from .models import (
18
+ TTSRequest, TTSResponse, Voice, AudioFormat,
19
+ get_content_type, get_format_from_content_type
20
+ )
21
+ from .exceptions import (
22
+ TTSException, APIException, NetworkException, ValidationException,
23
+ create_exception_from_response
24
+ )
25
+ from .utils import (
26
+ get_realistic_headers, sanitize_text, validate_url, build_url,
27
+ exponential_backoff, estimate_audio_duration, format_file_size,
28
+ validate_text_length, split_text_by_length
29
+ )
30
+
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class AsyncTTSClient:
36
+ """
37
+ Asynchronous TTS client for text-to-speech generation.
38
+
39
+ This client provides an async interface for generating speech from text
40
+ using OpenAI-compatible TTS services with support for concurrent requests.
41
+
42
+ Attributes:
43
+ base_url: Base URL for the TTS service
44
+ api_key: API key for authentication (if required)
45
+ timeout: Request timeout in seconds
46
+ max_retries: Maximum number of retry attempts
47
+ verify_ssl: Whether to verify SSL certificates
48
+ max_concurrent: Maximum concurrent requests
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ base_url: str = "https://www.openai.fm",
54
+ api_key: Optional[str] = None,
55
+ timeout: float = 30.0,
56
+ max_retries: int = 3,
57
+ verify_ssl: bool = True,
58
+ max_concurrent: int = 10,
59
+ **kwargs
60
+ ):
61
+ """
62
+ Initialize the async TTS client.
63
+
64
+ Args:
65
+ base_url: Base URL for the TTS service
66
+ api_key: API key for authentication
67
+ timeout: Request timeout in seconds
68
+ max_retries: Maximum retry attempts
69
+ verify_ssl: Whether to verify SSL certificates
70
+ max_concurrent: Maximum concurrent requests
71
+ **kwargs: Additional configuration options
72
+ """
73
+ self.base_url = base_url.rstrip('/')
74
+ self.api_key = api_key
75
+ self.timeout = timeout
76
+ self.max_retries = max_retries
77
+ self.verify_ssl = verify_ssl
78
+ self.max_concurrent = max_concurrent
79
+
80
+ # Validate base URL
81
+ if not validate_url(self.base_url):
82
+ raise ValidationException(f"Invalid base URL: {self.base_url}")
83
+
84
+ # Session will be created when needed
85
+ self._session: Optional[ClientSession] = None
86
+ self._semaphore = asyncio.Semaphore(max_concurrent)
87
+
88
+ logger.info(f"Initialized async TTS client with base URL: {self.base_url}")
89
+
90
+ async def __aenter__(self):
91
+ """Async context manager entry."""
92
+ await self._ensure_session()
93
+ return self
94
+
95
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
96
+ """Async context manager exit."""
97
+ await self.close()
98
+
99
+ async def _ensure_session(self):
100
+ """Ensure HTTP session is created."""
101
+ if self._session is None or self._session.closed:
102
+ # Setup headers
103
+ headers = get_realistic_headers()
104
+ if self.api_key:
105
+ headers["Authorization"] = f"Bearer {self.api_key}"
106
+
107
+ # Create timeout configuration
108
+ timeout = ClientTimeout(total=self.timeout)
109
+
110
+ # Create session
111
+ connector = aiohttp.TCPConnector(
112
+ verify_ssl=self.verify_ssl,
113
+ limit=self.max_concurrent * 2
114
+ )
115
+
116
+ self._session = ClientSession(
117
+ headers=headers,
118
+ timeout=timeout,
119
+ connector=connector
120
+ )
121
+
122
+ async def generate_speech(
123
+ self,
124
+ text: str,
125
+ voice: Union[Voice, str] = Voice.ALLOY,
126
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
127
+ instructions: Optional[str] = None,
128
+ max_length: int = 4096,
129
+ validate_length: bool = True,
130
+ **kwargs
131
+ ) -> TTSResponse:
132
+ """
133
+ Generate speech from text asynchronously.
134
+
135
+ Args:
136
+ text: Text to convert to speech
137
+ voice: Voice to use for generation
138
+ response_format: Audio format for output
139
+ instructions: Optional instructions for voice modulation
140
+ max_length: Maximum allowed text length in characters (default: 4096)
141
+ validate_length: Whether to validate text length (default: True)
142
+ **kwargs: Additional parameters
143
+
144
+ Returns:
145
+ TTSResponse: Generated audio response
146
+
147
+ Raises:
148
+ TTSException: If generation fails
149
+ ValueError: If text exceeds max_length and validate_length is True
150
+ """
151
+ # Create and validate request
152
+ request = TTSRequest(
153
+ input=sanitize_text(text),
154
+ voice=voice,
155
+ response_format=response_format,
156
+ instructions=instructions,
157
+ max_length=max_length,
158
+ validate_length=validate_length,
159
+ **kwargs
160
+ )
161
+
162
+ return await self._make_request(request)
163
+
164
+ async def generate_speech_long_text(
165
+ self,
166
+ text: str,
167
+ voice: Union[Voice, str] = Voice.ALLOY,
168
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
169
+ instructions: Optional[str] = None,
170
+ max_length: int = 4096,
171
+ preserve_words: bool = True,
172
+ **kwargs
173
+ ) -> List[TTSResponse]:
174
+ """
175
+ Generate speech from long text by splitting it into chunks asynchronously.
176
+
177
+ This method automatically splits text that exceeds max_length into
178
+ smaller chunks and generates speech for each chunk concurrently.
179
+
180
+ Args:
181
+ text: Text to convert to speech
182
+ voice: Voice to use for generation
183
+ response_format: Audio format for output
184
+ instructions: Optional instructions for voice modulation
185
+ max_length: Maximum length per chunk (default: 4096)
186
+ preserve_words: Whether to avoid splitting words (default: True)
187
+ **kwargs: Additional parameters
188
+
189
+ Returns:
190
+ List[TTSResponse]: List of generated audio responses
191
+
192
+ Raises:
193
+ TTSException: If generation fails for any chunk
194
+ """
195
+ # Sanitize text first
196
+ clean_text = sanitize_text(text)
197
+
198
+ # Split text into chunks
199
+ chunks = split_text_by_length(clean_text, max_length, preserve_words)
200
+
201
+ if not chunks:
202
+ raise ValueError("No valid text chunks found after processing")
203
+
204
+ # Create requests for all chunks
205
+ requests = []
206
+ for chunk in chunks:
207
+ request = TTSRequest(
208
+ input=chunk,
209
+ voice=voice,
210
+ response_format=response_format,
211
+ instructions=instructions,
212
+ max_length=max_length,
213
+ validate_length=False, # We already split the text
214
+ **kwargs
215
+ )
216
+ requests.append(request)
217
+
218
+ # Process all chunks concurrently
219
+ return await self.generate_speech_batch(requests)
220
+
221
+ async def generate_speech_batch(
222
+ self,
223
+ requests: List[TTSRequest]
224
+ ) -> List[TTSResponse]:
225
+ """
226
+ Generate speech for multiple requests concurrently.
227
+
228
+ Args:
229
+ requests: List of TTS requests
230
+
231
+ Returns:
232
+ List[TTSResponse]: List of generated audio responses
233
+
234
+ Raises:
235
+ TTSException: If any generation fails
236
+ """
237
+ if not requests:
238
+ return []
239
+
240
+ # Process requests concurrently with semaphore limiting
241
+ tasks = [self._make_request(request) for request in requests]
242
+ responses = await asyncio.gather(*tasks, return_exceptions=True)
243
+
244
+ # Check for exceptions and convert them
245
+ results = []
246
+ for i, response in enumerate(responses):
247
+ if isinstance(response, Exception):
248
+ raise TTSException(f"Request {i} failed: {str(response)}")
249
+ results.append(response)
250
+
251
+ return results
252
+
253
+ async def generate_speech_from_request(self, request: TTSRequest) -> TTSResponse:
254
+ """
255
+ Generate speech from a TTSRequest object asynchronously.
256
+
257
+ Args:
258
+ request: TTS request object
259
+
260
+ Returns:
261
+ TTSResponse: Generated audio response
262
+ """
263
+ return await self._make_request(request)
264
+
265
+ async def _make_request(self, request: TTSRequest) -> TTSResponse:
266
+ """
267
+ Make the actual HTTP request to the TTS service.
268
+
269
+ Args:
270
+ request: TTS request object
271
+
272
+ Returns:
273
+ TTSResponse: Generated audio response
274
+
275
+ Raises:
276
+ TTSException: If request fails
277
+ """
278
+ await self._ensure_session()
279
+
280
+ async with self._semaphore: # Limit concurrent requests
281
+ url = build_url(self.base_url, "api/generate")
282
+
283
+ # Prepare form data for openai.fm API
284
+ form_data = {
285
+ 'input': request.input,
286
+ 'voice': request.voice.value,
287
+ 'generation': str(uuid.uuid4()),
288
+ 'response_format': request.response_format.value if hasattr(request.response_format, 'value') else str(request.response_format)
289
+ }
290
+
291
+ # Add prompt/instructions if provided
292
+ if request.instructions:
293
+ form_data['prompt'] = request.instructions
294
+ else:
295
+ # Default prompt for better quality
296
+ form_data['prompt'] = (
297
+ "Affect/personality: Natural and clear\n\n"
298
+ "Tone: Friendly and professional, creating a pleasant listening experience.\n\n"
299
+ "Pronunciation: Clear, articulate, and steady, ensuring each word is easily understood "
300
+ "while maintaining a natural, conversational flow.\n\n"
301
+ "Pause: Brief, purposeful pauses between sentences to allow time for the listener "
302
+ "to process the information.\n\n"
303
+ "Emotion: Warm and engaging, conveying the intended message effectively."
304
+ )
305
+
306
+ logger.info(f"Generating speech for text: '{request.input[:50]}...' with voice: {request.voice}")
307
+
308
+ # Make request with retries
309
+ for attempt in range(self.max_retries + 1):
310
+ try:
311
+ # Add random delay for rate limiting (except first attempt)
312
+ if attempt > 0:
313
+ delay = exponential_backoff(attempt - 1)
314
+ logger.info(f"Retrying request after {delay:.2f}s (attempt {attempt + 1})")
315
+ await asyncio.sleep(delay)
316
+
317
+ # Use form data as required by openai.fm
318
+ async with self._session.post(url, data=form_data) as response:
319
+ # Handle different response types
320
+ if response.status == 200:
321
+ return await self._process_openai_fm_response(response, request)
322
+ else:
323
+ # Try to parse error response
324
+ try:
325
+ error_data = await response.json()
326
+ except (json.JSONDecodeError, ValueError):
327
+ text = await response.text()
328
+ error_data = {"error": {"message": text or "Unknown error"}}
329
+
330
+ # Create appropriate exception
331
+ exception = create_exception_from_response(
332
+ response.status,
333
+ error_data,
334
+ f"TTS request failed with status {response.status}"
335
+ )
336
+
337
+ # Don't retry for certain errors
338
+ if response.status in [400, 401, 403, 404]:
339
+ raise exception
340
+
341
+ # For retryable errors, continue to next attempt
342
+ if attempt == self.max_retries:
343
+ raise exception
344
+
345
+ logger.warning(f"Request failed with status {response.status}, retrying...")
346
+ continue
347
+
348
+ except asyncio.TimeoutError:
349
+ if attempt == self.max_retries:
350
+ raise NetworkException(
351
+ f"Request timed out after {self.timeout}s",
352
+ timeout=self.timeout,
353
+ retry_count=attempt
354
+ )
355
+ logger.warning(f"Request timed out, retrying...")
356
+ continue
357
+
358
+ except aiohttp.ClientError as e:
359
+ if attempt == self.max_retries:
360
+ raise NetworkException(
361
+ f"Client error: {str(e)}",
362
+ retry_count=attempt
363
+ )
364
+ logger.warning(f"Client error, retrying...")
365
+ continue
366
+
367
+ # This should never be reached, but just in case
368
+ raise TTSException("Maximum retries exceeded")
369
+
370
+ async def _process_openai_fm_response(
371
+ self,
372
+ response: aiohttp.ClientResponse,
373
+ request: TTSRequest
374
+ ) -> TTSResponse:
375
+ """
376
+ Process a successful response from the openai.fm TTS service.
377
+
378
+ Args:
379
+ response: HTTP response object
380
+ request: Original TTS request
381
+
382
+ Returns:
383
+ TTSResponse: Processed response object
384
+ """
385
+ # Get content type from response headers
386
+ content_type = response.headers.get("content-type", "audio/mpeg")
387
+
388
+ # Get audio data
389
+ audio_data = await response.read()
390
+
391
+ if not audio_data:
392
+ raise APIException("Received empty audio data from openai.fm")
393
+
394
+ # Determine format from content type
395
+ if "audio/mpeg" in content_type or "audio/mp3" in content_type:
396
+ actual_format = AudioFormat.MP3
397
+ elif "audio/wav" in content_type:
398
+ actual_format = AudioFormat.WAV
399
+ elif "audio/opus" in content_type:
400
+ actual_format = AudioFormat.OPUS
401
+ elif "audio/aac" in content_type:
402
+ actual_format = AudioFormat.AAC
403
+ elif "audio/flac" in content_type:
404
+ actual_format = AudioFormat.FLAC
405
+ else:
406
+ # Default to MP3 for openai.fm
407
+ actual_format = AudioFormat.MP3
408
+
409
+ # Estimate duration based on text length
410
+ estimated_duration = estimate_audio_duration(request.input)
411
+
412
+ # Check if returned format differs from requested format
413
+ requested_format = request.response_format
414
+ if isinstance(requested_format, str):
415
+ try:
416
+ requested_format = AudioFormat(requested_format.lower())
417
+ except ValueError:
418
+ requested_format = AudioFormat.MP3 # Default fallback
419
+
420
+ # Import here to avoid circular imports
421
+ from .models import maps_to_wav
422
+
423
+ # Check if format differs from request
424
+ if actual_format != requested_format:
425
+ if maps_to_wav(requested_format.value) and actual_format.value == "wav":
426
+ logger.debug(
427
+ f"Format '{requested_format.value}' requested, returning WAV format."
428
+ )
429
+ else:
430
+ logger.warning(
431
+ f"Requested format '{requested_format.value}' but received '{actual_format.value}' "
432
+ f"from service."
433
+ )
434
+
435
+ # Create response object
436
+ tts_response = TTSResponse(
437
+ audio_data=audio_data,
438
+ content_type=content_type,
439
+ format=actual_format,
440
+ size=len(audio_data),
441
+ duration=estimated_duration,
442
+ metadata={
443
+ "response_headers": dict(response.headers),
444
+ "status_code": response.status,
445
+ "url": str(response.url),
446
+ "service": "openai.fm",
447
+ "voice": request.voice.value,
448
+ "original_text": request.input[:100] + "..." if len(request.input) > 100 else request.input,
449
+ "requested_format": requested_format.value,
450
+ "actual_format": actual_format.value
451
+ }
452
+ )
453
+
454
+ logger.info(
455
+ f"Successfully generated {format_file_size(len(audio_data))} "
456
+ f"of {actual_format.value.upper()} audio from openai.fm using voice '{request.voice.value}'"
457
+ )
458
+
459
+ return tts_response
460
+
461
+ async def close(self):
462
+ """Close the HTTP session."""
463
+ if self._session and not self._session.closed:
464
+ await self._session.close()
ttsfm/cli.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Command-line interface for TTSFM.
4
+
5
+ This module provides a command-line interface for the TTSFM package,
6
+ allowing users to generate speech from text using various options.
7
+ """
8
+
9
+ import argparse
10
+ import sys
11
+ import os
12
+ from typing import Optional
13
+ from pathlib import Path
14
+
15
+ from .client import TTSClient
16
+ from .models import Voice, AudioFormat
17
+ from .exceptions import TTSException, APIException, NetworkException
18
+
19
+
20
+ def create_parser() -> argparse.ArgumentParser:
21
+ """Create and configure the argument parser."""
22
+ parser = argparse.ArgumentParser(
23
+ prog="ttsfm",
24
+ description="TTSFM - Text-to-Speech API Client",
25
+ formatter_class=argparse.RawDescriptionHelpFormatter,
26
+ epilog="""
27
+ Examples:
28
+ ttsfm "Hello, world!" --output hello.mp3
29
+ ttsfm "Hello, world!" --voice nova --format wav --output hello.wav
30
+ ttsfm "Hello, world!" --url http://localhost:7000 --output hello.mp3
31
+ ttsfm --text-file input.txt --output speech.mp3
32
+ """
33
+ )
34
+
35
+ # Text input options (mutually exclusive)
36
+ text_group = parser.add_mutually_exclusive_group(required=True)
37
+ text_group.add_argument(
38
+ "text",
39
+ nargs="?",
40
+ help="Text to convert to speech"
41
+ )
42
+ text_group.add_argument(
43
+ "--text-file", "-f",
44
+ type=str,
45
+ help="Read text from file"
46
+ )
47
+
48
+ # Output options
49
+ parser.add_argument(
50
+ "--output", "-o",
51
+ type=str,
52
+ required=True,
53
+ help="Output file path"
54
+ )
55
+
56
+ # TTS options
57
+ parser.add_argument(
58
+ "--voice", "-v",
59
+ type=str,
60
+ default="alloy",
61
+ choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
62
+ help="Voice to use for speech generation (default: alloy)"
63
+ )
64
+
65
+ parser.add_argument(
66
+ "--format",
67
+ type=str,
68
+ default="mp3",
69
+ choices=["mp3", "opus", "aac", "flac", "wav", "pcm"],
70
+ help="Audio format (default: mp3)"
71
+ )
72
+
73
+ parser.add_argument(
74
+ "--speed",
75
+ type=float,
76
+ default=1.0,
77
+ help="Speech speed (0.25 to 4.0, default: 1.0)"
78
+ )
79
+
80
+ # Client options
81
+ parser.add_argument(
82
+ "--url", "-u",
83
+ type=str,
84
+ default="http://localhost:7000",
85
+ help="TTS service URL (default: http://localhost:7000)"
86
+ )
87
+
88
+ parser.add_argument(
89
+ "--api-key", "-k",
90
+ type=str,
91
+ help="API key for authentication"
92
+ )
93
+
94
+ parser.add_argument(
95
+ "--timeout",
96
+ type=float,
97
+ default=30.0,
98
+ help="Request timeout in seconds (default: 30.0)"
99
+ )
100
+
101
+ parser.add_argument(
102
+ "--retries",
103
+ type=int,
104
+ default=3,
105
+ help="Maximum number of retries (default: 3)"
106
+ )
107
+
108
+ # Text length validation options
109
+ parser.add_argument(
110
+ "--max-length",
111
+ type=int,
112
+ default=4096,
113
+ help="Maximum text length in characters (default: 4096)"
114
+ )
115
+
116
+ parser.add_argument(
117
+ "--no-length-validation",
118
+ action="store_true",
119
+ help="Disable text length validation"
120
+ )
121
+
122
+ parser.add_argument(
123
+ "--split-long-text",
124
+ action="store_true",
125
+ help="Automatically split long text into chunks"
126
+ )
127
+
128
+ # Other options
129
+ parser.add_argument(
130
+ "--verbose", "-V",
131
+ action="store_true",
132
+ help="Enable verbose output"
133
+ )
134
+
135
+ parser.add_argument(
136
+ "--version",
137
+ action="version",
138
+ version=f"%(prog)s {get_version()}"
139
+ )
140
+
141
+ return parser
142
+
143
+
144
+ def get_version() -> str:
145
+ """Get the package version."""
146
+ try:
147
+ from . import __version__
148
+ return __version__
149
+ except ImportError:
150
+ return "unknown"
151
+
152
+
153
+ def read_text_file(file_path: str) -> str:
154
+ """Read text from a file."""
155
+ try:
156
+ with open(file_path, 'r', encoding='utf-8') as f:
157
+ return f.read().strip()
158
+ except FileNotFoundError:
159
+ print(f"Error: File '{file_path}' not found.", file=sys.stderr)
160
+ sys.exit(1)
161
+ except Exception as e:
162
+ print(f"Error reading file '{file_path}': {e}", file=sys.stderr)
163
+ sys.exit(1)
164
+
165
+
166
+ def validate_speed(speed: float) -> float:
167
+ """Validate and return the speed parameter."""
168
+ if not 0.25 <= speed <= 4.0:
169
+ print("Error: Speed must be between 0.25 and 4.0", file=sys.stderr)
170
+ sys.exit(1)
171
+ return speed
172
+
173
+
174
+ def get_voice_enum(voice_str: str) -> Voice:
175
+ """Convert voice string to Voice enum."""
176
+ voice_map = {
177
+ "alloy": Voice.ALLOY,
178
+ "echo": Voice.ECHO,
179
+ "fable": Voice.FABLE,
180
+ "onyx": Voice.ONYX,
181
+ "nova": Voice.NOVA,
182
+ "shimmer": Voice.SHIMMER,
183
+ }
184
+ return voice_map[voice_str.lower()]
185
+
186
+
187
+ def get_format_enum(format_str: str) -> AudioFormat:
188
+ """Convert format string to AudioFormat enum."""
189
+ format_map = {
190
+ "mp3": AudioFormat.MP3,
191
+ "opus": AudioFormat.OPUS,
192
+ "aac": AudioFormat.AAC,
193
+ "flac": AudioFormat.FLAC,
194
+ "wav": AudioFormat.WAV,
195
+ "pcm": AudioFormat.PCM,
196
+ }
197
+ return format_map[format_str.lower()]
198
+
199
+
200
+ def handle_long_text(args, text: str, voice: Voice, audio_format: AudioFormat, speed: float) -> None:
201
+ """Handle long text by splitting it into chunks and generating multiple files."""
202
+ from .utils import split_text_by_length
203
+ import os
204
+
205
+ # Split text into chunks
206
+ chunks = split_text_by_length(text, args.max_length, preserve_words=True)
207
+
208
+ if not chunks:
209
+ print("Error: No valid text chunks found after processing.", file=sys.stderr)
210
+ sys.exit(1)
211
+
212
+ print(f"Split text into {len(chunks)} chunks")
213
+
214
+ # Create client
215
+ try:
216
+ client = TTSClient(
217
+ base_url=args.url,
218
+ api_key=args.api_key,
219
+ timeout=args.timeout,
220
+ max_retries=args.retries
221
+ )
222
+
223
+ # Generate speech for each chunk
224
+ base_name, ext = os.path.splitext(args.output)
225
+
226
+ for i, chunk in enumerate(chunks, 1):
227
+ if args.verbose:
228
+ print(f"Processing chunk {i}/{len(chunks)} ({len(chunk)} characters)...")
229
+
230
+ # Generate filename for this chunk
231
+ if len(chunks) == 1:
232
+ output_file = args.output
233
+ else:
234
+ output_file = f"{base_name}_part{i:03d}{ext}"
235
+
236
+ # Generate speech for this chunk
237
+ audio_data = client.generate_speech(
238
+ text=chunk,
239
+ voice=voice,
240
+ response_format=audio_format,
241
+ speed=speed,
242
+ max_length=args.max_length,
243
+ validate_length=False # We already split the text
244
+ )
245
+
246
+ # Save to file
247
+ with open(output_file, 'wb') as f:
248
+ f.write(audio_data)
249
+
250
+ print(f"Generated: {output_file}")
251
+
252
+ if len(chunks) > 1:
253
+ print(f"\nGenerated {len(chunks)} audio files from long text.")
254
+ print(f"Files: {base_name}_part001{ext} to {base_name}_part{len(chunks):03d}{ext}")
255
+
256
+ except Exception as e:
257
+ print(f"Error processing long text: {e}", file=sys.stderr)
258
+ if args.verbose:
259
+ import traceback
260
+ traceback.print_exc()
261
+ sys.exit(1)
262
+
263
+
264
+ def main() -> None:
265
+ """Main CLI entry point."""
266
+ parser = create_parser()
267
+ args = parser.parse_args()
268
+
269
+ # Get text input
270
+ if args.text:
271
+ text = args.text
272
+ else:
273
+ text = read_text_file(args.text_file)
274
+
275
+ if not text:
276
+ print("Error: No text provided.", file=sys.stderr)
277
+ sys.exit(1)
278
+
279
+ # Validate parameters
280
+ speed = validate_speed(args.speed)
281
+ voice = get_voice_enum(args.voice)
282
+ audio_format = get_format_enum(args.format)
283
+
284
+ # Create output directory if needed
285
+ output_path = Path(args.output)
286
+ output_path.parent.mkdir(parents=True, exist_ok=True)
287
+
288
+ # Check text length and handle accordingly
289
+ text_length = len(text)
290
+ validate_length = not args.no_length_validation
291
+
292
+ if args.verbose:
293
+ print(f"Text: {text[:50]}{'...' if len(text) > 50 else ''}")
294
+ print(f"Text length: {text_length} characters")
295
+ print(f"Max length: {args.max_length}")
296
+ print(f"Length validation: {'enabled' if validate_length else 'disabled'}")
297
+ print(f"Voice: {args.voice}")
298
+ print(f"Format: {args.format}")
299
+ print(f"Speed: {speed}")
300
+ print(f"URL: {args.url}")
301
+ print(f"Output: {args.output}")
302
+ print()
303
+
304
+ # Handle long text
305
+ if text_length > args.max_length:
306
+ if args.split_long_text:
307
+ print(f"Text is {text_length} characters, splitting into chunks...")
308
+ return handle_long_text(args, text, voice, audio_format, speed)
309
+ elif validate_length:
310
+ print(f"Error: Text is too long ({text_length} characters). "
311
+ f"Maximum allowed is {args.max_length} characters.", file=sys.stderr)
312
+ print("Use --split-long-text to automatically split the text, "
313
+ "or --no-length-validation to disable this check.", file=sys.stderr)
314
+ sys.exit(1)
315
+
316
+ # Create client
317
+ try:
318
+ client = TTSClient(
319
+ base_url=args.url,
320
+ api_key=args.api_key,
321
+ timeout=args.timeout,
322
+ max_retries=args.retries
323
+ )
324
+
325
+ if args.verbose:
326
+ print("Generating speech...")
327
+
328
+ # Generate speech
329
+ audio_data = client.generate_speech(
330
+ text=text,
331
+ voice=voice,
332
+ response_format=audio_format,
333
+ speed=speed,
334
+ max_length=args.max_length,
335
+ validate_length=validate_length
336
+ )
337
+
338
+ # Save to file
339
+ with open(args.output, 'wb') as f:
340
+ f.write(audio_data)
341
+
342
+ print(f"Speech generated successfully: {args.output}")
343
+
344
+ except NetworkException as e:
345
+ print(f"Network error: {e}", file=sys.stderr)
346
+ sys.exit(1)
347
+ except APIException as e:
348
+ print(f"API error: {e}", file=sys.stderr)
349
+ sys.exit(1)
350
+ except TTSException as e:
351
+ print(f"TTS error: {e}", file=sys.stderr)
352
+ sys.exit(1)
353
+ except Exception as e:
354
+ print(f"Unexpected error: {e}", file=sys.stderr)
355
+ if args.verbose:
356
+ import traceback
357
+ traceback.print_exc()
358
+ sys.exit(1)
359
+
360
+
361
+ if __name__ == "__main__":
362
+ main()
ttsfm/client.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main TTS client implementation.
3
+
4
+ This module provides the primary TTSClient class for synchronous
5
+ text-to-speech generation with OpenAI-compatible API.
6
+ """
7
+
8
+ import json
9
+ import time
10
+ import uuid
11
+ import logging
12
+ from typing import Optional, Dict, Any, Union, List
13
+ from urllib.parse import urljoin
14
+
15
+ import requests
16
+ from requests.adapters import HTTPAdapter
17
+ from urllib3.util.retry import Retry
18
+
19
+ from .models import (
20
+ TTSRequest, TTSResponse, Voice, AudioFormat,
21
+ get_content_type, get_format_from_content_type
22
+ )
23
+ from .exceptions import (
24
+ TTSException, APIException, NetworkException, ValidationException,
25
+ create_exception_from_response
26
+ )
27
+ from .utils import (
28
+ get_realistic_headers, sanitize_text, validate_url, build_url,
29
+ exponential_backoff, estimate_audio_duration, format_file_size,
30
+ validate_text_length, split_text_by_length
31
+ )
32
+
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class TTSClient:
38
+ """
39
+ Synchronous TTS client for text-to-speech generation.
40
+
41
+ This client provides a simple interface for generating speech from text
42
+ using OpenAI-compatible TTS services.
43
+
44
+ Attributes:
45
+ base_url: Base URL for the TTS service
46
+ api_key: API key for authentication (if required)
47
+ timeout: Request timeout in seconds
48
+ max_retries: Maximum number of retry attempts
49
+ verify_ssl: Whether to verify SSL certificates
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ base_url: str = "https://www.openai.fm",
55
+ api_key: Optional[str] = None,
56
+ timeout: float = 30.0,
57
+ max_retries: int = 3,
58
+ verify_ssl: bool = True,
59
+ preferred_format: Optional[AudioFormat] = None,
60
+ **kwargs
61
+ ):
62
+ """
63
+ Initialize the TTS client.
64
+
65
+ Args:
66
+ base_url: Base URL for the TTS service
67
+ api_key: API key for authentication
68
+ timeout: Request timeout in seconds
69
+ max_retries: Maximum retry attempts
70
+ verify_ssl: Whether to verify SSL certificates
71
+ preferred_format: Preferred audio format (affects header selection)
72
+ **kwargs: Additional configuration options
73
+ """
74
+ self.base_url = base_url.rstrip('/')
75
+ self.api_key = api_key
76
+ self.timeout = timeout
77
+ self.max_retries = max_retries
78
+ self.verify_ssl = verify_ssl
79
+ self.preferred_format = preferred_format or AudioFormat.WAV
80
+
81
+ # Validate base URL
82
+ if not validate_url(self.base_url):
83
+ raise ValidationException(f"Invalid base URL: {self.base_url}")
84
+
85
+ # Setup HTTP session with retry strategy
86
+ self.session = requests.Session()
87
+
88
+ # Configure retry strategy
89
+ retry_strategy = Retry(
90
+ total=max_retries,
91
+ status_forcelist=[429, 500, 502, 503, 504],
92
+ allowed_methods=["HEAD", "GET", "POST"], # Updated parameter name
93
+ backoff_factor=1
94
+ )
95
+
96
+ adapter = HTTPAdapter(max_retries=retry_strategy)
97
+ self.session.mount("http://", adapter)
98
+ self.session.mount("https://", adapter)
99
+
100
+ # Set default headers
101
+ self.session.headers.update(get_realistic_headers())
102
+
103
+ if self.api_key:
104
+ self.session.headers["Authorization"] = f"Bearer {self.api_key}"
105
+
106
+ logger.info(f"Initialized TTS client with base URL: {self.base_url}")
107
+
108
+ def _get_headers_for_format(self, requested_format: AudioFormat) -> Dict[str, str]:
109
+ """
110
+ Get appropriate headers to get the desired format from openai.fm.
111
+
112
+ Based on testing, openai.fm returns:
113
+ - MP3: When using simple/minimal headers
114
+ - WAV: When using full Chrome security headers
115
+
116
+ Args:
117
+ requested_format: The desired audio format
118
+
119
+ Returns:
120
+ Dict[str, str]: HTTP headers optimized for the requested format
121
+ """
122
+ from .models import get_supported_format
123
+
124
+ # Map requested format to supported format
125
+ target_format = get_supported_format(requested_format)
126
+
127
+ if target_format == AudioFormat.MP3:
128
+ # Use minimal headers to get MP3 response
129
+ return {
130
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
131
+ 'Accept': 'audio/*,*/*;q=0.9'
132
+ }
133
+ else:
134
+ # Use full realistic headers to get WAV response
135
+ # This works for WAV, OPUS, AAC, FLAC, PCM formats
136
+ return get_realistic_headers()
137
+
138
+ def generate_speech(
139
+ self,
140
+ text: str,
141
+ voice: Union[Voice, str] = Voice.ALLOY,
142
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
143
+ instructions: Optional[str] = None,
144
+ max_length: int = 4096,
145
+ validate_length: bool = True,
146
+ **kwargs
147
+ ) -> TTSResponse:
148
+ """
149
+ Generate speech from text.
150
+
151
+ Args:
152
+ text: Text to convert to speech
153
+ voice: Voice to use for generation
154
+ response_format: Audio format for output
155
+ instructions: Optional instructions for voice modulation
156
+ max_length: Maximum allowed text length in characters (default: 4096)
157
+ validate_length: Whether to validate text length (default: True)
158
+ **kwargs: Additional parameters
159
+
160
+ Returns:
161
+ TTSResponse: Generated audio response
162
+
163
+ Raises:
164
+ TTSException: If generation fails
165
+ ValueError: If text exceeds max_length and validate_length is True
166
+ """
167
+ # Create and validate request
168
+ request = TTSRequest(
169
+ input=sanitize_text(text),
170
+ voice=voice,
171
+ response_format=response_format,
172
+ instructions=instructions,
173
+ max_length=max_length,
174
+ validate_length=validate_length,
175
+ **kwargs
176
+ )
177
+
178
+ return self._make_request(request)
179
+
180
+ def generate_speech_from_request(self, request: TTSRequest) -> TTSResponse:
181
+ """
182
+ Generate speech from a TTSRequest object.
183
+
184
+ Args:
185
+ request: TTS request object
186
+
187
+ Returns:
188
+ TTSResponse: Generated audio response
189
+ """
190
+ return self._make_request(request)
191
+
192
+ def generate_speech_batch(
193
+ self,
194
+ text: str,
195
+ voice: Union[Voice, str] = Voice.ALLOY,
196
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
197
+ instructions: Optional[str] = None,
198
+ max_length: int = 4096,
199
+ preserve_words: bool = True,
200
+ **kwargs
201
+ ) -> List[TTSResponse]:
202
+ """
203
+ Generate speech from long text by splitting it into chunks.
204
+
205
+ This method automatically splits text that exceeds max_length into
206
+ smaller chunks and generates speech for each chunk separately.
207
+
208
+ Args:
209
+ text: Text to convert to speech
210
+ voice: Voice to use for generation
211
+ response_format: Audio format for output
212
+ instructions: Optional instructions for voice modulation
213
+ max_length: Maximum length per chunk (default: 4096)
214
+ preserve_words: Whether to avoid splitting words (default: True)
215
+ **kwargs: Additional parameters
216
+
217
+ Returns:
218
+ List[TTSResponse]: List of generated audio responses
219
+
220
+ Raises:
221
+ TTSException: If generation fails for any chunk
222
+ """
223
+
224
+ # Sanitize text first
225
+ clean_text = sanitize_text(text)
226
+
227
+ # Split text into chunks
228
+ chunks = split_text_by_length(clean_text, max_length, preserve_words)
229
+
230
+ if not chunks:
231
+ raise ValueError("No valid text chunks found after processing")
232
+
233
+ responses = []
234
+
235
+ for i, chunk in enumerate(chunks):
236
+ logger.info(f"Processing chunk {i+1}/{len(chunks)} ({len(chunk)} characters)")
237
+
238
+ # Create request for this chunk (disable length validation since we already split)
239
+ request = TTSRequest(
240
+ input=chunk,
241
+ voice=voice,
242
+ response_format=response_format,
243
+ instructions=instructions,
244
+ max_length=max_length,
245
+ validate_length=False, # We already split the text
246
+ **kwargs
247
+ )
248
+
249
+ response = self._make_request(request)
250
+ responses.append(response)
251
+
252
+ return responses
253
+
254
+ def _make_request(self, request: TTSRequest) -> TTSResponse:
255
+ """
256
+ Make the actual HTTP request to the openai.fm TTS service.
257
+
258
+ Args:
259
+ request: TTS request object
260
+
261
+ Returns:
262
+ TTSResponse: Generated audio response
263
+
264
+ Raises:
265
+ TTSException: If request fails
266
+ """
267
+ url = build_url(self.base_url, "api/generate")
268
+
269
+ # Prepare form data for openai.fm API
270
+ form_data = {
271
+ 'input': request.input,
272
+ 'voice': request.voice.value,
273
+ 'generation': str(uuid.uuid4()),
274
+ 'response_format': request.response_format.value if hasattr(request.response_format, 'value') else str(request.response_format)
275
+ }
276
+
277
+ # Add prompt/instructions if provided
278
+ if request.instructions:
279
+ form_data['prompt'] = request.instructions
280
+ else:
281
+ # Default prompt for better quality
282
+ form_data['prompt'] = (
283
+ "Affect/personality: Natural and clear\n\n"
284
+ "Tone: Friendly and professional, creating a pleasant listening experience.\n\n"
285
+ "Pronunciation: Clear, articulate, and steady, ensuring each word is easily understood "
286
+ "while maintaining a natural, conversational flow.\n\n"
287
+ "Pause: Brief, purposeful pauses between sentences to allow time for the listener "
288
+ "to process the information.\n\n"
289
+ "Emotion: Warm and engaging, conveying the intended message effectively."
290
+ )
291
+
292
+ # Get optimized headers for the requested format
293
+ # Convert string format to AudioFormat enum if needed
294
+ requested_format = request.response_format
295
+ if isinstance(requested_format, str):
296
+ try:
297
+ requested_format = AudioFormat(requested_format.lower())
298
+ except ValueError:
299
+ requested_format = AudioFormat.WAV # Default to WAV for unknown formats
300
+
301
+ format_headers = self._get_headers_for_format(requested_format)
302
+
303
+ logger.info(f"Generating speech for text: '{request.input[:50]}...' with voice: {request.voice}")
304
+ logger.debug(f"Using headers optimized for {requested_format.value} format")
305
+
306
+ # Make request with retries
307
+ for attempt in range(self.max_retries + 1):
308
+ try:
309
+ # Add random delay for rate limiting (except first attempt)
310
+ if attempt > 0:
311
+ delay = exponential_backoff(attempt - 1)
312
+ logger.info(f"Retrying request after {delay:.2f}s (attempt {attempt + 1})")
313
+ time.sleep(delay)
314
+
315
+ # Use multipart form data as required by openai.fm
316
+ response = self.session.post(
317
+ url,
318
+ data=form_data,
319
+ headers=format_headers,
320
+ timeout=self.timeout,
321
+ verify=self.verify_ssl
322
+ )
323
+
324
+ # Handle different response types
325
+ if response.status_code == 200:
326
+ return self._process_openai_fm_response(response, request)
327
+ else:
328
+ # Try to parse error response
329
+ try:
330
+ error_data = response.json()
331
+ except (json.JSONDecodeError, ValueError):
332
+ error_data = {"error": {"message": response.text or "Unknown error"}}
333
+
334
+ # Create appropriate exception
335
+ exception = create_exception_from_response(
336
+ response.status_code,
337
+ error_data,
338
+ f"TTS request failed with status {response.status_code}"
339
+ )
340
+
341
+ # Don't retry for certain errors
342
+ if response.status_code in [400, 401, 403, 404]:
343
+ raise exception
344
+
345
+ # For retryable errors, continue to next attempt
346
+ if attempt == self.max_retries:
347
+ raise exception
348
+
349
+ logger.warning(f"Request failed with status {response.status_code}, retrying...")
350
+ continue
351
+
352
+ except requests.exceptions.Timeout:
353
+ if attempt == self.max_retries:
354
+ raise NetworkException(
355
+ f"Request timed out after {self.timeout}s",
356
+ timeout=self.timeout,
357
+ retry_count=attempt
358
+ )
359
+ logger.warning(f"Request timed out, retrying...")
360
+ continue
361
+
362
+ except requests.exceptions.ConnectionError as e:
363
+ if attempt == self.max_retries:
364
+ raise NetworkException(
365
+ f"Connection error: {str(e)}",
366
+ retry_count=attempt
367
+ )
368
+ logger.warning(f"Connection error, retrying...")
369
+ continue
370
+
371
+ except requests.exceptions.RequestException as e:
372
+ if attempt == self.max_retries:
373
+ raise NetworkException(
374
+ f"Request error: {str(e)}",
375
+ retry_count=attempt
376
+ )
377
+ logger.warning(f"Request error, retrying...")
378
+ continue
379
+
380
+ # This should never be reached, but just in case
381
+ raise TTSException("Maximum retries exceeded")
382
+
383
+ def _process_openai_fm_response(self, response: requests.Response, request: TTSRequest) -> TTSResponse:
384
+ """
385
+ Process a successful response from the openai.fm TTS service.
386
+
387
+ Args:
388
+ response: HTTP response object
389
+ request: Original TTS request
390
+
391
+ Returns:
392
+ TTSResponse: Processed response object
393
+ """
394
+ # Get content type from response headers
395
+ content_type = response.headers.get("content-type", "audio/mpeg")
396
+
397
+ # Get audio data
398
+ audio_data = response.content
399
+
400
+ if not audio_data:
401
+ raise APIException("Received empty audio data from openai.fm")
402
+
403
+ # Determine format from content type
404
+ if "audio/mpeg" in content_type or "audio/mp3" in content_type:
405
+ actual_format = AudioFormat.MP3
406
+ elif "audio/wav" in content_type:
407
+ actual_format = AudioFormat.WAV
408
+ elif "audio/opus" in content_type:
409
+ actual_format = AudioFormat.OPUS
410
+ elif "audio/aac" in content_type:
411
+ actual_format = AudioFormat.AAC
412
+ elif "audio/flac" in content_type:
413
+ actual_format = AudioFormat.FLAC
414
+ else:
415
+ # Default to MP3 for openai.fm
416
+ actual_format = AudioFormat.MP3
417
+
418
+ # Estimate duration based on text length (rough approximation)
419
+ estimated_duration = estimate_audio_duration(request.input)
420
+
421
+ # Check if returned format differs from requested format
422
+ requested_format = request.response_format
423
+ if isinstance(requested_format, str):
424
+ try:
425
+ requested_format = AudioFormat(requested_format.lower())
426
+ except ValueError:
427
+ requested_format = AudioFormat.WAV # Default fallback
428
+
429
+ # Import here to avoid circular imports
430
+ from .models import get_supported_format, maps_to_wav
431
+
432
+ # Check if format differs from request
433
+ if actual_format != requested_format:
434
+ if maps_to_wav(requested_format.value) and actual_format.value == "wav":
435
+ logger.debug(
436
+ f"Format '{requested_format.value}' requested, returning WAV format."
437
+ )
438
+ else:
439
+ logger.warning(
440
+ f"Requested format '{requested_format.value}' but received '{actual_format.value}' "
441
+ f"from service."
442
+ )
443
+
444
+ # Create response object
445
+ tts_response = TTSResponse(
446
+ audio_data=audio_data,
447
+ content_type=content_type,
448
+ format=actual_format,
449
+ size=len(audio_data),
450
+ duration=estimated_duration,
451
+ metadata={
452
+ "response_headers": dict(response.headers),
453
+ "status_code": response.status_code,
454
+ "url": str(response.url),
455
+ "service": "openai.fm",
456
+ "voice": request.voice.value,
457
+ "original_text": request.input[:100] + "..." if len(request.input) > 100 else request.input,
458
+ "requested_format": requested_format.value,
459
+ "actual_format": actual_format.value
460
+ }
461
+ )
462
+
463
+ logger.info(
464
+ f"Successfully generated {format_file_size(len(audio_data))} "
465
+ f"of {actual_format.value.upper()} audio from openai.fm using voice '{request.voice.value}'"
466
+ )
467
+
468
+ return tts_response
469
+
470
+ def close(self):
471
+ """Close the HTTP session."""
472
+ if hasattr(self, 'session'):
473
+ self.session.close()
474
+
475
+ def __enter__(self):
476
+ """Context manager entry."""
477
+ return self
478
+
479
+ def __exit__(self, exc_type, exc_val, exc_tb):
480
+ """Context manager exit."""
481
+ self.close()
ttsfm/exceptions.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Exception classes for the TTSFM package.
3
+
4
+ This module defines the exception hierarchy used throughout the package
5
+ for consistent error handling and reporting.
6
+ """
7
+
8
+ from typing import Optional, Dict, Any
9
+
10
+
11
+ class TTSException(Exception):
12
+ """
13
+ Base exception class for all TTSFM-related errors.
14
+
15
+ Attributes:
16
+ message: Human-readable error message
17
+ code: Error code for programmatic handling
18
+ details: Additional error details
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ message: str,
24
+ code: Optional[str] = None,
25
+ details: Optional[Dict[str, Any]] = None
26
+ ):
27
+ super().__init__(message)
28
+ self.message = message
29
+ self.code = code or self.__class__.__name__
30
+ self.details = details or {}
31
+
32
+ def __str__(self) -> str:
33
+ if self.code:
34
+ return f"[{self.code}] {self.message}"
35
+ return self.message
36
+
37
+ def __repr__(self) -> str:
38
+ return f"{self.__class__.__name__}(message='{self.message}', code='{self.code}')"
39
+
40
+
41
+ class APIException(TTSException):
42
+ """
43
+ Exception raised for API-related errors.
44
+
45
+ This includes HTTP errors, invalid responses, and server-side issues.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ message: str,
51
+ status_code: Optional[int] = None,
52
+ response_data: Optional[Dict[str, Any]] = None,
53
+ **kwargs
54
+ ):
55
+ super().__init__(message, **kwargs)
56
+ self.status_code = status_code
57
+ self.response_data = response_data or {}
58
+
59
+ def __str__(self) -> str:
60
+ if self.status_code:
61
+ return f"[HTTP {self.status_code}] {self.message}"
62
+ return super().__str__()
63
+
64
+
65
+ class NetworkException(TTSException):
66
+ """
67
+ Exception raised for network-related errors.
68
+
69
+ This includes connection timeouts, DNS resolution failures, and other
70
+ network connectivity issues.
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ message: str,
76
+ timeout: Optional[float] = None,
77
+ retry_count: int = 0,
78
+ **kwargs
79
+ ):
80
+ super().__init__(message, **kwargs)
81
+ self.timeout = timeout
82
+ self.retry_count = retry_count
83
+
84
+
85
+ class ValidationException(TTSException):
86
+ """
87
+ Exception raised for input validation errors.
88
+
89
+ This includes invalid parameters, missing required fields, and
90
+ data format issues.
91
+ """
92
+
93
+ def __init__(
94
+ self,
95
+ message: str,
96
+ field: Optional[str] = None,
97
+ value: Optional[Any] = None,
98
+ **kwargs
99
+ ):
100
+ super().__init__(message, **kwargs)
101
+ self.field = field
102
+ self.value = value
103
+
104
+ def __str__(self) -> str:
105
+ if self.field:
106
+ return f"Validation error for '{self.field}': {self.message}"
107
+ return f"Validation error: {self.message}"
108
+
109
+
110
+ class RateLimitException(APIException):
111
+ """
112
+ Exception raised when API rate limits are exceeded.
113
+
114
+ Attributes:
115
+ retry_after: Seconds to wait before retrying (if provided by server)
116
+ limit: Rate limit that was exceeded
117
+ remaining: Remaining requests in current window
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ message: str = "Rate limit exceeded",
123
+ retry_after: Optional[int] = None,
124
+ limit: Optional[int] = None,
125
+ remaining: Optional[int] = None,
126
+ **kwargs
127
+ ):
128
+ super().__init__(message, status_code=429, **kwargs)
129
+ self.retry_after = retry_after
130
+ self.limit = limit
131
+ self.remaining = remaining
132
+
133
+ def __str__(self) -> str:
134
+ msg = super().__str__()
135
+ if self.retry_after:
136
+ msg += f" (retry after {self.retry_after}s)"
137
+ return msg
138
+
139
+
140
+ class AuthenticationException(APIException):
141
+ """
142
+ Exception raised for authentication and authorization errors.
143
+
144
+ This includes invalid API keys, expired tokens, and insufficient
145
+ permissions.
146
+ """
147
+
148
+ def __init__(
149
+ self,
150
+ message: str = "Authentication failed",
151
+ **kwargs
152
+ ):
153
+ super().__init__(message, status_code=401, **kwargs)
154
+
155
+
156
+ class ServiceUnavailableException(APIException):
157
+ """
158
+ Exception raised when the TTS service is temporarily unavailable.
159
+
160
+ This includes server maintenance, overload conditions, and
161
+ temporary service outages.
162
+ """
163
+
164
+ def __init__(
165
+ self,
166
+ message: str = "Service temporarily unavailable",
167
+ retry_after: Optional[int] = None,
168
+ **kwargs
169
+ ):
170
+ super().__init__(message, status_code=503, **kwargs)
171
+ self.retry_after = retry_after
172
+
173
+
174
+ class QuotaExceededException(APIException):
175
+ """
176
+ Exception raised when usage quotas are exceeded.
177
+
178
+ This includes monthly limits, character limits, and other
179
+ usage-based restrictions.
180
+ """
181
+
182
+ def __init__(
183
+ self,
184
+ message: str = "Usage quota exceeded",
185
+ quota_type: Optional[str] = None,
186
+ limit: Optional[int] = None,
187
+ used: Optional[int] = None,
188
+ **kwargs
189
+ ):
190
+ super().__init__(message, status_code=402, **kwargs)
191
+ self.quota_type = quota_type
192
+ self.limit = limit
193
+ self.used = used
194
+
195
+
196
+ class AudioProcessingException(TTSException):
197
+ """
198
+ Exception raised for audio processing errors.
199
+
200
+ This includes format conversion issues, audio generation failures,
201
+ and output processing problems.
202
+ """
203
+
204
+ def __init__(
205
+ self,
206
+ message: str,
207
+ audio_format: Optional[str] = None,
208
+ **kwargs
209
+ ):
210
+ super().__init__(message, **kwargs)
211
+ self.audio_format = audio_format
212
+
213
+
214
+ def create_exception_from_response(
215
+ status_code: int,
216
+ response_data: Dict[str, Any],
217
+ default_message: str = "API request failed"
218
+ ) -> APIException:
219
+ """
220
+ Create appropriate exception from API response.
221
+
222
+ Args:
223
+ status_code: HTTP status code
224
+ response_data: Response data from API
225
+ default_message: Default message if none in response
226
+
227
+ Returns:
228
+ APIException: Appropriate exception instance
229
+ """
230
+ message = response_data.get("error", {}).get("message", default_message)
231
+
232
+ if status_code == 401:
233
+ return AuthenticationException(message, response_data=response_data)
234
+ elif status_code == 402:
235
+ return QuotaExceededException(message, response_data=response_data)
236
+ elif status_code == 429:
237
+ retry_after = response_data.get("retry_after")
238
+ return RateLimitException(message, retry_after=retry_after, response_data=response_data)
239
+ elif status_code == 503:
240
+ retry_after = response_data.get("retry_after")
241
+ return ServiceUnavailableException(message, retry_after=retry_after, response_data=response_data)
242
+ else:
243
+ return APIException(message, status_code=status_code, response_data=response_data)
ttsfm/models.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data models and types for the TTSFM package.
3
+
4
+ This module defines the core data structures used throughout the package,
5
+ including request/response models, enums, and error types.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Optional, Dict, Any, Union
10
+ from dataclasses import dataclass
11
+ from datetime import datetime
12
+
13
+
14
+ class Voice(str, Enum):
15
+ """Available voice options for TTS generation."""
16
+ ALLOY = "alloy"
17
+ ASH = "ash"
18
+ BALLAD = "ballad"
19
+ CORAL = "coral"
20
+ ECHO = "echo"
21
+ FABLE = "fable"
22
+ NOVA = "nova"
23
+ ONYX = "onyx"
24
+ SAGE = "sage"
25
+ SHIMMER = "shimmer"
26
+ VERSE = "verse"
27
+
28
+
29
+ class AudioFormat(str, Enum):
30
+ """Supported audio output formats."""
31
+ MP3 = "mp3"
32
+ WAV = "wav"
33
+ OPUS = "opus"
34
+ AAC = "aac"
35
+ FLAC = "flac"
36
+ PCM = "pcm"
37
+
38
+
39
+ @dataclass
40
+ class TTSRequest:
41
+ """
42
+ Request model for TTS generation.
43
+
44
+ Attributes:
45
+ input: Text to convert to speech
46
+ voice: Voice to use for generation
47
+ response_format: Audio format for output
48
+ instructions: Optional instructions for voice modulation
49
+ model: Model to use (for OpenAI compatibility, usually ignored)
50
+ speed: Speech speed (for OpenAI compatibility, usually ignored)
51
+ max_length: Maximum allowed text length (default: 4096 characters)
52
+ validate_length: Whether to validate text length (default: True)
53
+ """
54
+ input: str
55
+ voice: Union[Voice, str] = Voice.ALLOY
56
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3
57
+ instructions: Optional[str] = None
58
+ model: Optional[str] = None
59
+ speed: Optional[float] = None
60
+ max_length: int = 4096
61
+ validate_length: bool = True
62
+
63
+ def __post_init__(self):
64
+ """Validate and normalize fields after initialization."""
65
+ # Ensure voice is a valid Voice enum
66
+ if isinstance(self.voice, str):
67
+ try:
68
+ self.voice = Voice(self.voice.lower())
69
+ except ValueError:
70
+ raise ValueError(f"Invalid voice: {self.voice}. Must be one of {list(Voice)}")
71
+
72
+ # Ensure response_format is a valid AudioFormat enum
73
+ if isinstance(self.response_format, str):
74
+ try:
75
+ self.response_format = AudioFormat(self.response_format.lower())
76
+ except ValueError:
77
+ raise ValueError(f"Invalid format: {self.response_format}. Must be one of {list(AudioFormat)}")
78
+
79
+ # Validate input text
80
+ if not self.input or not self.input.strip():
81
+ raise ValueError("Input text cannot be empty")
82
+
83
+ # Validate text length if enabled
84
+ if self.validate_length:
85
+ text_length = len(self.input)
86
+ if text_length > self.max_length:
87
+ raise ValueError(
88
+ f"Input text is too long ({text_length} characters). "
89
+ f"Maximum allowed length is {self.max_length} characters. "
90
+ f"Consider splitting your text into smaller chunks or disable "
91
+ f"length validation with validate_length=False."
92
+ )
93
+
94
+ # Validate max_length parameter
95
+ if self.max_length <= 0:
96
+ raise ValueError("max_length must be a positive integer")
97
+
98
+ # Validate speed if provided
99
+ if self.speed is not None and (self.speed < 0.25 or self.speed > 4.0):
100
+ raise ValueError("Speed must be between 0.25 and 4.0")
101
+
102
+ def to_dict(self) -> Dict[str, Any]:
103
+ """Convert request to dictionary for API calls."""
104
+ data = {
105
+ "input": self.input,
106
+ "voice": self.voice.value if isinstance(self.voice, Voice) else self.voice,
107
+ "response_format": self.response_format.value if isinstance(self.response_format, AudioFormat) else self.response_format
108
+ }
109
+
110
+ if self.instructions:
111
+ data["instructions"] = self.instructions
112
+
113
+ if self.model:
114
+ data["model"] = self.model
115
+
116
+ if self.speed is not None:
117
+ data["speed"] = self.speed
118
+
119
+ return data
120
+
121
+
122
+ @dataclass
123
+ class TTSResponse:
124
+ """
125
+ Response model for TTS generation.
126
+
127
+ Attributes:
128
+ audio_data: Generated audio as bytes
129
+ content_type: MIME type of the audio data
130
+ format: Audio format used
131
+ size: Size of audio data in bytes
132
+ duration: Estimated duration in seconds (if available)
133
+ metadata: Additional response metadata
134
+ """
135
+ audio_data: bytes
136
+ content_type: str
137
+ format: AudioFormat
138
+ size: int
139
+ duration: Optional[float] = None
140
+ metadata: Optional[Dict[str, Any]] = None
141
+
142
+ def __post_init__(self):
143
+ """Calculate derived fields after initialization."""
144
+ if self.size is None:
145
+ self.size = len(self.audio_data)
146
+
147
+ def save_to_file(self, filename: str) -> str:
148
+ """
149
+ Save audio data to a file.
150
+
151
+ Args:
152
+ filename: Target filename (extension will be added if missing)
153
+
154
+ Returns:
155
+ str: Final filename used
156
+ """
157
+ import os
158
+
159
+ # Use the actual returned format for the extension, not any requested format
160
+ expected_extension = f".{self.format.value}"
161
+
162
+ # Check if filename already has the correct extension
163
+ if filename.endswith(expected_extension):
164
+ final_filename = filename
165
+ else:
166
+ # Remove any existing extension and add the correct one
167
+ base_name = filename
168
+ # Remove common audio extensions if present
169
+ for ext in ['.mp3', '.wav', '.opus', '.aac', '.flac', '.pcm']:
170
+ if base_name.endswith(ext):
171
+ base_name = base_name[:-len(ext)]
172
+ break
173
+ final_filename = f"{base_name}{expected_extension}"
174
+
175
+ # Create directory if it doesn't exist
176
+ os.makedirs(os.path.dirname(final_filename) if os.path.dirname(final_filename) else ".", exist_ok=True)
177
+
178
+ # Write audio data
179
+ with open(final_filename, "wb") as f:
180
+ f.write(self.audio_data)
181
+
182
+ return final_filename
183
+
184
+
185
+ @dataclass
186
+ class TTSError:
187
+ """
188
+ Error information from TTS API.
189
+
190
+ Attributes:
191
+ code: Error code
192
+ message: Human-readable error message
193
+ type: Error type/category
194
+ details: Additional error details
195
+ timestamp: When the error occurred
196
+ """
197
+ code: str
198
+ message: str
199
+ type: Optional[str] = None
200
+ details: Optional[Dict[str, Any]] = None
201
+ timestamp: Optional[datetime] = None
202
+
203
+ def __post_init__(self):
204
+ """Set timestamp if not provided."""
205
+ if self.timestamp is None:
206
+ self.timestamp = datetime.now()
207
+
208
+
209
+ @dataclass
210
+ class APIError(TTSError):
211
+ """API-specific error information."""
212
+ status_code: int = 500
213
+ headers: Optional[Dict[str, str]] = None
214
+
215
+
216
+ @dataclass
217
+ class NetworkError(TTSError):
218
+ """Network-related error information."""
219
+ timeout: Optional[float] = None
220
+ retry_count: int = 0
221
+
222
+
223
+ @dataclass
224
+ class ValidationError(TTSError):
225
+ """Validation error information."""
226
+ field: Optional[str] = None
227
+ value: Optional[Any] = None
228
+
229
+
230
+ # Content type mappings for audio formats
231
+ CONTENT_TYPE_MAP = {
232
+ AudioFormat.MP3: "audio/mpeg",
233
+ AudioFormat.OPUS: "audio/opus",
234
+ AudioFormat.AAC: "audio/aac",
235
+ AudioFormat.FLAC: "audio/flac",
236
+ AudioFormat.WAV: "audio/wav",
237
+ AudioFormat.PCM: "audio/pcm"
238
+ }
239
+
240
+ # Reverse mapping for content type to format
241
+ FORMAT_FROM_CONTENT_TYPE = {v: k for k, v in CONTENT_TYPE_MAP.items()}
242
+
243
+
244
+ def get_content_type(format: Union[AudioFormat, str]) -> str:
245
+ """Get MIME content type for audio format."""
246
+ if isinstance(format, str):
247
+ format = AudioFormat(format.lower())
248
+ return CONTENT_TYPE_MAP.get(format, "audio/mpeg")
249
+
250
+
251
+ def get_format_from_content_type(content_type: str) -> AudioFormat:
252
+ """Get audio format from MIME content type."""
253
+ return FORMAT_FROM_CONTENT_TYPE.get(content_type, AudioFormat.MP3)
254
+
255
+
256
+ def get_supported_format(requested_format: AudioFormat) -> AudioFormat:
257
+ """
258
+ Map requested format to supported format.
259
+
260
+ Args:
261
+ requested_format: The requested audio format
262
+
263
+ Returns:
264
+ AudioFormat: MP3 or WAV (the supported formats)
265
+ """
266
+ if requested_format == AudioFormat.MP3:
267
+ return AudioFormat.MP3
268
+ else:
269
+ # All other formats (WAV, OPUS, AAC, FLAC, PCM) return WAV
270
+ return AudioFormat.WAV
271
+
272
+
273
+ def maps_to_wav(format_value: str) -> bool:
274
+ """
275
+ Check if a format maps to WAV.
276
+
277
+ Args:
278
+ format_value: Format string to check
279
+
280
+ Returns:
281
+ bool: True if the format maps to WAV
282
+ """
283
+ return format_value.lower() in ['wav', 'opus', 'aac', 'flac', 'pcm']
ttsfm/utils.py ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for the TTSFM package.
3
+
4
+ This module provides common utility functions used throughout the package,
5
+ including HTTP helpers, validation utilities, and configuration management.
6
+ """
7
+
8
+ import os
9
+ import re
10
+ import time
11
+ import random
12
+ import logging
13
+ from typing import Dict, Any, Optional, Union, List
14
+ from urllib.parse import urljoin, urlparse
15
+
16
+
17
+ # Configure logging
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def get_user_agent() -> str:
22
+ """
23
+ Generate a realistic User-Agent string.
24
+
25
+ Returns:
26
+ str: User-Agent string for HTTP requests
27
+ """
28
+ try:
29
+ from fake_useragent import UserAgent
30
+ ua = UserAgent()
31
+ return ua.random
32
+ except ImportError:
33
+ # Fallback if fake_useragent is not available
34
+ return "TTSFM-Client/3.0.0 (Python)"
35
+
36
+
37
+ def get_realistic_headers() -> Dict[str, str]:
38
+ """
39
+ Generate realistic HTTP headers for requests.
40
+
41
+ Returns:
42
+ Dict[str, str]: HTTP headers dictionary
43
+ """
44
+ user_agent = get_user_agent()
45
+
46
+ headers = {
47
+ "Accept": "application/json, audio/*",
48
+ "Accept-Encoding": "gzip, deflate, br",
49
+ "Accept-Language": random.choice(["en-US,en;q=0.9", "en-GB,en;q=0.8", "en-CA,en;q=0.7"]),
50
+ "Cache-Control": "no-cache",
51
+ "DNT": "1",
52
+ "Pragma": "no-cache",
53
+ "User-Agent": user_agent,
54
+ "X-Requested-With": "XMLHttpRequest",
55
+ }
56
+
57
+ # Add browser-specific headers for Chromium-based browsers
58
+ if any(browser in user_agent.lower() for browser in ['chrome', 'edge', 'chromium']):
59
+ version_match = re.search(r'(?:Chrome|Edge|Chromium)/(\d+)', user_agent)
60
+ major_version = version_match.group(1) if version_match else "121"
61
+
62
+ brands = []
63
+ if 'google chrome' in user_agent.lower():
64
+ brands.extend([
65
+ f'"Google Chrome";v="{major_version}"',
66
+ f'"Chromium";v="{major_version}"',
67
+ '"Not A(Brand";v="99"'
68
+ ])
69
+ elif 'microsoft edge' in user_agent.lower():
70
+ brands.extend([
71
+ f'"Microsoft Edge";v="{major_version}"',
72
+ f'"Chromium";v="{major_version}"',
73
+ '"Not A(Brand";v="99"'
74
+ ])
75
+ else:
76
+ brands.extend([
77
+ f'"Chromium";v="{major_version}"',
78
+ '"Not A(Brand";v="8"'
79
+ ])
80
+
81
+ headers.update({
82
+ "Sec-Ch-Ua": ", ".join(brands),
83
+ "Sec-Ch-Ua-Mobile": "?0",
84
+ "Sec-Ch-Ua-Platform": random.choice(['"Windows"', '"macOS"', '"Linux"']),
85
+ "Sec-Fetch-Dest": "empty",
86
+ "Sec-Fetch-Mode": "cors",
87
+ "Sec-Fetch-Site": "same-origin"
88
+ })
89
+
90
+ # Randomly add some optional headers
91
+ if random.random() < 0.5:
92
+ headers["Upgrade-Insecure-Requests"] = "1"
93
+
94
+ return headers
95
+
96
+
97
+ def validate_text_length(text: str, max_length: int = 4096, raise_error: bool = True) -> bool:
98
+ """
99
+ Validate text length against maximum allowed characters.
100
+
101
+ Args:
102
+ text: Text to validate
103
+ max_length: Maximum allowed length in characters
104
+ raise_error: Whether to raise an exception if validation fails
105
+
106
+ Returns:
107
+ bool: True if text is within limits, False otherwise
108
+
109
+ Raises:
110
+ ValueError: If text exceeds max_length and raise_error is True
111
+ """
112
+ if not text:
113
+ return True
114
+
115
+ text_length = len(text)
116
+
117
+ if text_length > max_length:
118
+ if raise_error:
119
+ raise ValueError(
120
+ f"Text is too long ({text_length} characters). "
121
+ f"Maximum allowed length is {max_length} characters. "
122
+ f"TTS models typically support up to 4096 characters per request."
123
+ )
124
+ return False
125
+
126
+ return True
127
+
128
+
129
+ def split_text_by_length(text: str, max_length: int = 4096, preserve_words: bool = True) -> List[str]:
130
+ """
131
+ Split text into chunks that don't exceed the maximum length.
132
+
133
+ Args:
134
+ text: Text to split
135
+ max_length: Maximum length per chunk
136
+ preserve_words: Whether to avoid splitting words
137
+
138
+ Returns:
139
+ List[str]: List of text chunks
140
+ """
141
+ if not text:
142
+ return []
143
+
144
+ if len(text) <= max_length:
145
+ return [text]
146
+
147
+ chunks = []
148
+
149
+ if preserve_words:
150
+ # Split by sentences first, then by words if needed
151
+ sentences = re.split(r'[.!?]+', text)
152
+ current_chunk = ""
153
+
154
+ for sentence in sentences:
155
+ sentence = sentence.strip()
156
+ if not sentence:
157
+ continue
158
+
159
+ # Add sentence ending punctuation back
160
+ if not sentence.endswith(('.', '!', '?')):
161
+ sentence += '.'
162
+
163
+ # Check if adding this sentence would exceed the limit
164
+ test_chunk = current_chunk + (" " if current_chunk else "") + sentence
165
+
166
+ if len(test_chunk) <= max_length:
167
+ current_chunk = test_chunk
168
+ else:
169
+ # Save current chunk if it has content
170
+ if current_chunk:
171
+ chunks.append(current_chunk.strip())
172
+
173
+ # If single sentence is too long, split by words
174
+ if len(sentence) > max_length:
175
+ word_chunks = _split_by_words(sentence, max_length)
176
+ chunks.extend(word_chunks)
177
+ current_chunk = ""
178
+ else:
179
+ current_chunk = sentence
180
+
181
+ # Add remaining chunk
182
+ if current_chunk:
183
+ chunks.append(current_chunk.strip())
184
+ else:
185
+ # Simple character-based splitting
186
+ for i in range(0, len(text), max_length):
187
+ chunks.append(text[i:i + max_length])
188
+
189
+ return [chunk for chunk in chunks if chunk.strip()]
190
+
191
+
192
+ def _split_by_words(text: str, max_length: int) -> List[str]:
193
+ """
194
+ Split text by words when sentences are too long.
195
+
196
+ Args:
197
+ text: Text to split
198
+ max_length: Maximum length per chunk
199
+
200
+ Returns:
201
+ List[str]: List of word-based chunks
202
+ """
203
+ words = text.split()
204
+ chunks = []
205
+ current_chunk = ""
206
+
207
+ for word in words:
208
+ test_chunk = current_chunk + (" " if current_chunk else "") + word
209
+
210
+ if len(test_chunk) <= max_length:
211
+ current_chunk = test_chunk
212
+ else:
213
+ if current_chunk:
214
+ chunks.append(current_chunk)
215
+
216
+ # If single word is too long, split it
217
+ if len(word) > max_length:
218
+ for i in range(0, len(word), max_length):
219
+ chunks.append(word[i:i + max_length])
220
+ current_chunk = ""
221
+ else:
222
+ current_chunk = word
223
+
224
+ if current_chunk:
225
+ chunks.append(current_chunk)
226
+
227
+ return chunks
228
+
229
+
230
+ def sanitize_text(text: str) -> str:
231
+ """
232
+ Sanitize input text for TTS processing.
233
+
234
+ Args:
235
+ text: Input text to sanitize
236
+
237
+ Returns:
238
+ str: Sanitized text
239
+ """
240
+ if not text:
241
+ return ""
242
+
243
+ # Remove HTML tags
244
+ text = re.sub(r'<[^>]+>', '', text)
245
+
246
+ # Remove script tags and content
247
+ text = re.sub(r'<script.*?</script>', '', text, flags=re.DOTALL | re.IGNORECASE)
248
+
249
+ # Remove potentially dangerous characters
250
+ text = re.sub(r'[<>"\']', '', text)
251
+
252
+ # Normalize whitespace
253
+ text = re.sub(r'\s+', ' ', text)
254
+
255
+ return text.strip()
256
+
257
+
258
+ def validate_url(url: str) -> bool:
259
+ """
260
+ Validate if a URL is properly formatted.
261
+
262
+ Args:
263
+ url: URL to validate
264
+
265
+ Returns:
266
+ bool: True if URL is valid, False otherwise
267
+ """
268
+ try:
269
+ result = urlparse(url)
270
+ return all([result.scheme, result.netloc])
271
+ except Exception:
272
+ return False
273
+
274
+
275
+ def build_url(base_url: str, path: str) -> str:
276
+ """
277
+ Build a complete URL from base URL and path.
278
+
279
+ Args:
280
+ base_url: Base URL
281
+ path: Path to append
282
+
283
+ Returns:
284
+ str: Complete URL
285
+ """
286
+ # Ensure base_url ends with /
287
+ if not base_url.endswith('/'):
288
+ base_url += '/'
289
+
290
+ # Ensure path doesn't start with /
291
+ if path.startswith('/'):
292
+ path = path[1:]
293
+
294
+ return urljoin(base_url, path)
295
+
296
+
297
+ def get_random_delay(min_delay: float = 1.0, max_delay: float = 5.0) -> float:
298
+ """
299
+ Get a random delay with jitter for rate limiting.
300
+
301
+ Args:
302
+ min_delay: Minimum delay in seconds
303
+ max_delay: Maximum delay in seconds
304
+
305
+ Returns:
306
+ float: Random delay in seconds
307
+ """
308
+ base_delay = random.uniform(min_delay, max_delay)
309
+ jitter = random.uniform(0.1, 0.5)
310
+ return base_delay + jitter
311
+
312
+
313
+ def exponential_backoff(attempt: int, base_delay: float = 1.0, max_delay: float = 60.0) -> float:
314
+ """
315
+ Calculate exponential backoff delay.
316
+
317
+ Args:
318
+ attempt: Attempt number (0-based)
319
+ base_delay: Base delay in seconds
320
+ max_delay: Maximum delay in seconds
321
+
322
+ Returns:
323
+ float: Delay in seconds
324
+ """
325
+ delay = base_delay * (2 ** attempt)
326
+ jitter = random.uniform(0.1, 0.3) * delay
327
+ return min(delay + jitter, max_delay)
328
+
329
+
330
+ def load_config_from_env(prefix: str = "TTSFM_") -> Dict[str, Any]:
331
+ """
332
+ Load configuration from environment variables.
333
+
334
+ Args:
335
+ prefix: Prefix for environment variables
336
+
337
+ Returns:
338
+ Dict[str, Any]: Configuration dictionary
339
+ """
340
+ config = {}
341
+
342
+ for key, value in os.environ.items():
343
+ if key.startswith(prefix):
344
+ config_key = key[len(prefix):].lower()
345
+
346
+ # Try to convert to appropriate type
347
+ if value.lower() in ('true', 'false'):
348
+ config[config_key] = value.lower() == 'true'
349
+ elif value.isdigit():
350
+ config[config_key] = int(value)
351
+ elif '.' in value and value.replace('.', '').isdigit():
352
+ config[config_key] = float(value)
353
+ else:
354
+ config[config_key] = value
355
+
356
+ return config
357
+
358
+
359
+ def setup_logging(level: Union[str, int] = logging.INFO, format_string: Optional[str] = None) -> None:
360
+ """
361
+ Setup logging configuration for the package.
362
+
363
+ Args:
364
+ level: Logging level
365
+ format_string: Custom format string
366
+ """
367
+ if format_string is None:
368
+ format_string = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
369
+
370
+ logging.basicConfig(
371
+ level=level,
372
+ format=format_string,
373
+ handlers=[logging.StreamHandler()]
374
+ )
375
+
376
+
377
+ def estimate_audio_duration(text: str, words_per_minute: float = 150.0) -> float:
378
+ """
379
+ Estimate audio duration based on text length.
380
+
381
+ Args:
382
+ text: Input text
383
+ words_per_minute: Average speaking rate
384
+
385
+ Returns:
386
+ float: Estimated duration in seconds
387
+ """
388
+ if not text:
389
+ return 0.0
390
+
391
+ # Count words (simple whitespace split)
392
+ word_count = len(text.split())
393
+
394
+ # Calculate duration in seconds
395
+ duration = (word_count / words_per_minute) * 60.0
396
+
397
+ # Add some buffer for pauses and processing
398
+ return duration * 1.1
399
+
400
+
401
+ def format_file_size(size_bytes: int) -> str:
402
+ """
403
+ Format file size in human-readable format.
404
+
405
+ Args:
406
+ size_bytes: Size in bytes
407
+
408
+ Returns:
409
+ str: Formatted size string
410
+ """
411
+ if size_bytes == 0:
412
+ return "0 B"
413
+
414
+ size_names = ["B", "KB", "MB", "GB"]
415
+ i = 0
416
+
417
+ while size_bytes >= 1024 and i < len(size_names) - 1:
418
+ size_bytes /= 1024.0
419
+ i += 1
420
+
421
+ return f"{size_bytes:.1f} {size_names[i]}"