wasmdashai commited on
Commit
5d39bc0
·
verified ·
1 Parent(s): b32aebd

Add 2 files

Browse files
Files changed (2) hide show
  1. index.html +334 -38
  2. prompts.txt +1 -0
index.html CHANGED
@@ -1,41 +1,337 @@
1
  <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <title>My app</title>
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <meta charset="utf-8">
 
 
7
  <style>
8
- body {
9
- display: flex;
10
- justify-content: center;
11
- align-items: center;
12
- overflow: hidden;
13
- height: 100dvh;
14
- font-family: "Arial", sans-serif;
15
- text-align: center;
16
- }
17
- .arrow {
18
- position: absolute;
19
- bottom: 32px;
20
- left: 0px;
21
- width: 100px;
22
- transform: rotate(30deg);
23
- }
24
- h1 {
25
- font-size: 50px;
26
- }
27
- h1 span {
28
- color: #acacac;
29
- font-size: 32px;
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  </style>
32
- </head>
33
- <body>
34
- <h1>
35
- <span>I'm ready to work,</span><br />
36
- Ask me anything.
37
- </h1>
38
- <img src="https://enzostvs-deepsite.hf.space/arrow.svg" class="arrow" />
39
- <script></script>
40
- <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=wasmdashai/mywap" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
41
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>LAHJA AI - Advanced Text-to-Speech</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
  <style>
10
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
11
+
12
+ body {
13
+ font-family: 'Inter', sans-serif;
14
+ background-color: #f8fafc;
15
+ }
16
+
17
+ .gradient-bg {
18
+ background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 50%, #d946ef 100%);
19
+ }
20
+
21
+ .textarea-shadow {
22
+ box-shadow: 0 4px 6px -1px rgba(79, 70, 229, 0.1), 0 2px 4px -1px rgba(79, 70, 229, 0.06);
23
+ }
24
+
25
+ .waveform {
26
+ height: 60px;
27
+ background: linear-gradient(90deg, #6366f1, #8b5cf6, #d946ef);
28
+ opacity: 0.7;
29
+ position: relative;
30
+ overflow: hidden;
31
+ }
32
+
33
+ .waveform::before {
34
+ content: "";
35
+ position: absolute;
36
+ top: 0;
37
+ left: 0;
38
+ right: 0;
39
+ bottom: 0;
40
+ background: linear-gradient(
41
+ 90deg,
42
+ transparent,
43
+ rgba(255, 255, 255, 0.2),
44
+ transparent
45
+ );
46
+ animation: wave 1.5s linear infinite;
47
+ }
48
+
49
+ @keyframes wave {
50
+ 0% {
51
+ transform: translateX(-100%);
52
+ }
53
+ 100% {
54
+ transform: translateX(100%);
55
+ }
56
+ }
57
+
58
+ .audio-player {
59
+ transition: all 0.3s ease;
60
+ }
61
+
62
+ .audio-player:hover {
63
+ transform: translateY(-2px);
64
+ box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
65
+ }
66
  </style>
67
+ </head>
68
+ <body>
69
+ <div class="min-h-screen flex flex-col">
70
+ <!-- Header -->
71
+ <header class="gradient-bg text-white py-6 shadow-lg">
72
+ <div class="container mx-auto px-4">
73
+ <div class="flex justify-between items-center">
74
+ <div class="flex items-center space-x-2">
75
+ <i class="fas fa-wave-square text-2xl"></i>
76
+ <h1 class="text-2xl font-bold">LAHJA AI</h1>
77
+ </div>
78
+ <div class="hidden md:flex items-center space-x-4">
79
+ <span class="text-sm font-medium bg-white/20 px-3 py-1 rounded-full">VITS Architecture</span>
80
+ <span class="text-sm font-medium bg-white/20 px-3 py-1 rounded-full">Transformers</span>
81
+ </div>
82
+ </div>
83
+ <p class="mt-2 text-sm opacity-80 max-w-2xl">
84
+ Advanced AI-powered text-to-speech with accent-aware synthesis using cutting-edge VITS architecture and transformer models.
85
+ </p>
86
+ </div>
87
+ </header>
88
+
89
+ <!-- Main Content -->
90
+ <main class="flex-grow container mx-auto px-4 py-8">
91
+ <div class="max-w-4xl mx-auto">
92
+ <div class="bg-white rounded-xl shadow-lg overflow-hidden">
93
+ <!-- Input Section -->
94
+ <div class="p-6 border-b border-gray-100">
95
+ <h2 class="text-xl font-semibold text-gray-800 mb-4">Text Input</h2>
96
+ <div class="relative">
97
+ <textarea
98
+ id="textInput"
99
+ class="w-full h-48 px-4 py-3 border border-gray-200 rounded-lg textarea-shadow focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 outline-none transition duration-200 resize-none"
100
+ placeholder="Enter the text you want to convert to speech..."
101
+ ></textarea>
102
+ <div class="absolute bottom-3 right-3 flex items-center space-x-2">
103
+ <span id="charCount" class="text-xs text-gray-500">0 characters</span>
104
+ <button id="clearBtn" class="text-gray-400 hover:text-gray-600 transition">
105
+ <i class="fas fa-times"></i>
106
+ </button>
107
+ </div>
108
+ </div>
109
+
110
+ <div class="mt-6 flex flex-col sm:flex-row justify-between items-center space-y-4 sm:space-y-0">
111
+ <div class="flex items-center space-x-4">
112
+ <div class="flex items-center">
113
+ <label for="voiceSelect" class="mr-2 text-sm font-medium text-gray-700">Voice:</label>
114
+ <select id="voiceSelect" class="border border-gray-200 rounded-md px-3 py-1 text-sm focus:ring-indigo-500 focus:border-indigo-500 outline-none">
115
+ <option value="us">American English</option>
116
+ <option value="uk">British English</option>
117
+ <option value="au">Australian English</option>
118
+ <option value="in">Indian English</option>
119
+ </select>
120
+ </div>
121
+ <div class="flex items-center">
122
+ <label for="speedSelect" class="mr-2 text-sm font-medium text-gray-700">Speed:</label>
123
+ <select id="speedSelect" class="border border-gray-200 rounded-md px-3 py-1 text-sm focus:ring-indigo-500 focus:border-indigo-500 outline-none">
124
+ <option value="0.8">Slow</option>
125
+ <option value="1.0" selected>Normal</option>
126
+ <option value="1.2">Fast</option>
127
+ </select>
128
+ </div>
129
+ </div>
130
+
131
+ <button id="generateBtn" class="gradient-bg hover:opacity-90 text-white font-medium py-2 px-6 rounded-lg shadow-md transition duration-200 flex items-center">
132
+ <i class="fas fa-play-circle mr-2"></i>
133
+ Generate Voice
134
+ </button>
135
+ </div>
136
+ </div>
137
+
138
+ <!-- Output Section -->
139
+ <div class="p-6">
140
+ <h2 class="text-xl font-semibold text-gray-800 mb-4">Generated Audio</h2>
141
+
142
+ <!-- Loading State -->
143
+ <div id="loadingState" class="hidden">
144
+ <div class="flex flex-col items-center justify-center py-8">
145
+ <div class="waveform w-full rounded-lg mb-4"></div>
146
+ <p class="text-gray-600 font-medium">Processing your request with LAHJA AI...</p>
147
+ <p class="text-sm text-gray-500 mt-1">This may take a few moments</p>
148
+ </div>
149
+ </div>
150
+
151
+ <!-- Audio Player -->
152
+ <div id="audioPlayerContainer" class="hidden">
153
+ <div class="audio-player bg-gradient-to-r from-indigo-50 to-purple-50 rounded-xl p-4 border border-gray-200">
154
+ <div class="flex items-center justify-between mb-3">
155
+ <div class="flex items-center space-x-3">
156
+ <i class="fas fa-headphones text-indigo-600 text-xl"></i>
157
+ <div>
158
+ <h3 class="font-medium text-gray-800">Generated Speech</h3>
159
+ <p class="text-xs text-gray-500" id="audioInfo">American English • Normal speed</p>
160
+ </div>
161
+ </div>
162
+ <button id="downloadBtn" class="text-indigo-600 hover:text-indigo-800 transition">
163
+ <i class="fas fa-download"></i>
164
+ </button>
165
+ </div>
166
+ <audio id="audioPlayer" controls class="w-full"></audio>
167
+ </div>
168
+ </div>
169
+
170
+ <!-- Empty State -->
171
+ <div id="emptyState" class="flex flex-col items-center justify-center py-12 text-center">
172
+ <i class="fas fa-comment-dots text-4xl text-gray-300 mb-4"></i>
173
+ <h3 class="text-lg font-medium text-gray-700">No audio generated yet</h3>
174
+ <p class="text-gray-500 max-w-md mt-1">Enter some text above and click "Generate Voice" to create your speech.</p>
175
+ </div>
176
+ </div>
177
+ </div>
178
+
179
+ <!-- Features Section -->
180
+ <div class="mt-12 grid grid-cols-1 md:grid-cols-3 gap-6">
181
+ <div class="bg-white p-6 rounded-xl shadow-sm border border-gray-100">
182
+ <div class="text-indigo-600 mb-3">
183
+ <i class="fas fa-microchip text-2xl"></i>
184
+ </div>
185
+ <h3 class="font-semibold text-lg mb-2">VITS Architecture</h3>
186
+ <p class="text-gray-600 text-sm">
187
+ Our advanced VITS model synthesizes realistic audio waveforms directly from text with exceptional clarity and naturalness.
188
+ </p>
189
+ </div>
190
+ <div class="bg-white p-6 rounded-xl shadow-sm border border-gray-100">
191
+ <div class="text-purple-600 mb-3">
192
+ <i class="fas fa-language text-2xl"></i>
193
+ </div>
194
+ <h3 class="font-semibold text-lg mb-2">Accent-Aware</h3>
195
+ <p class="text-gray-600 text-sm">
196
+ Captures local vocal characteristics and intonation patterns for authentic regional speech synthesis.
197
+ </p>
198
+ </div>
199
+ <div class="bg-white p-6 rounded-xl shadow-sm border border-gray-100">
200
+ <div class="text-pink-600 mb-3">
201
+ <i class="fas fa-brain text-2xl"></i>
202
+ </div>
203
+ <h3 class="font-semibold text-lg mb-2">Transformer Models</h3>
204
+ <p class="text-gray-600 text-sm">
205
+ Deep linguistic analysis enables context-aware speech generation that reflects natural human expression.
206
+ </p>
207
+ </div>
208
+ </div>
209
+ </div>
210
+ </main>
211
+
212
+ <!-- Footer -->
213
+ <footer class="bg-gray-50 py-6 border-t border-gray-200">
214
+ <div class="container mx-auto px-4 text-center">
215
+ <p class="text-gray-500 text-sm">
216
+ &copy; 2023 LAHJA AI. Advanced text-to-speech powered by VITS architecture and transformer models.
217
+ </p>
218
+ </div>
219
+ </footer>
220
+ </div>
221
+
222
+ <script>
223
+ document.addEventListener('DOMContentLoaded', function() {
224
+ // DOM Elements
225
+ const textInput = document.getElementById('textInput');
226
+ const charCount = document.getElementById('charCount');
227
+ const clearBtn = document.getElementById('clearBtn');
228
+ const generateBtn = document.getElementById('generateBtn');
229
+ const voiceSelect = document.getElementById('voiceSelect');
230
+ const speedSelect = document.getElementById('speedSelect');
231
+ const loadingState = document.getElementById('loadingState');
232
+ const audioPlayerContainer = document.getElementById('audioPlayerContainer');
233
+ const emptyState = document.getElementById('emptyState');
234
+ const audioPlayer = document.getElementById('audioPlayer');
235
+ const downloadBtn = document.getElementById('downloadBtn');
236
+ const audioInfo = document.getElementById('audioInfo');
237
+
238
+ // Update character count
239
+ textInput.addEventListener('input', function() {
240
+ const count = textInput.value.length;
241
+ charCount.textContent = `${count} characters`;
242
+
243
+ if (count > 0) {
244
+ clearBtn.classList.remove('invisible');
245
+ } else {
246
+ clearBtn.classList.add('invisible');
247
+ }
248
+ });
249
+
250
+ // Clear text input
251
+ clearBtn.addEventListener('click', function() {
252
+ textInput.value = '';
253
+ charCount.textContent = '0 characters';
254
+ clearBtn.classList.add('invisible');
255
+ });
256
+
257
+ // Generate voice
258
+ generateBtn.addEventListener('click', function() {
259
+ const text = textInput.value.trim();
260
+ if (!text) {
261
+ alert('Please enter some text to convert to speech.');
262
+ return;
263
+ }
264
+
265
+ const voice = voiceSelect.value;
266
+ const speed = speedSelect.value;
267
+
268
+ // Show loading state
269
+ loadingState.classList.remove('hidden');
270
+ audioPlayerContainer.classList.add('hidden');
271
+ emptyState.classList.add('hidden');
272
+
273
+ // Simulate API call (in a real app, this would be a fetch or axios call)
274
+ simulateAPICall(text, voice, speed);
275
+ });
276
+
277
+ // Download audio
278
+ downloadBtn.addEventListener('click', function() {
279
+ if (audioPlayer.src) {
280
+ const a = document.createElement('a');
281
+ a.href = audioPlayer.src;
282
+ a.download = `lahja-ai-voice-${new Date().getTime()}.mp3`;
283
+ document.body.appendChild(a);
284
+ a.click();
285
+ document.body.removeChild(a);
286
+ }
287
+ });
288
+
289
+ // Simulate API call (replace with actual API call in production)
290
+ function simulateAPICall(text, voice, speed) {
291
+ console.log(`Making API call with text: "${text}", voice: ${voice}, speed: ${speed}`);
292
+
293
+ // Simulate network delay
294
+ setTimeout(() => {
295
+ // This is a simulation - in a real app, you would:
296
+ // 1. Make a POST request to your API endpoint
297
+ // 2. Handle the response with the audio URL
298
+ // 3. Set the audio player source
299
+
300
+ // For demo purposes, we'll use a placeholder audio
301
+ const voiceLabels = {
302
+ 'us': 'American English',
303
+ 'uk': 'British English',
304
+ 'au': 'Australian English',
305
+ 'in': 'Indian English'
306
+ };
307
+
308
+ const speedLabels = {
309
+ '0.8': 'Slow',
310
+ '1.0': 'Normal',
311
+ '1.2': 'Fast'
312
+ };
313
+
314
+ // Update audio info
315
+ audioInfo.textContent = `${voiceLabels[voice]} • ${speedLabels[speed]} speed`;
316
+
317
+ // Set audio source (in a real app, this would come from the API response)
318
+ audioPlayer.src = 'https://www.soundhelix.com/examples/mp3/SoundHelix-Song-1.mp3';
319
+
320
+ // Hide loading, show audio player
321
+ loadingState.classList.add('hidden');
322
+ audioPlayerContainer.classList.remove('hidden');
323
+
324
+ // Play audio automatically
325
+ setTimeout(() => {
326
+ audioPlayer.play().catch(e => console.log('Autoplay prevented:', e));
327
+ }, 300);
328
+
329
+ }, 2000);
330
+ }
331
+
332
+ // Initialize
333
+ textInput.dispatchEvent(new Event('input'));
334
+ });
335
+ </script>
336
+ <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=wasmdashai/mywap" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
337
+ </html>
prompts.txt CHANGED
@@ -0,0 +1 @@
 
 
1
+ Prompt (English): "Create a professional and responsive web page using HTML and Tailwind CSS for a text-to-speech application. The page should include: A textarea for users to input text A button labeled 'Generate Voice' A loading message while the audio is being generated An audio player to play the generated speech Clean, modern styling with good spacing and readable fonts Include JavaScript to handle API calls (POST request) and set the audio source dynamically based on the response. Assume the API returns a JSON object with an audio_url field." LAHJA AI is an advanced framework that leverages cutting-edge artificial intelligence techniques to generate high-quality speech from text. At its core, it utilizes the VITS architecture, a powerful model that synthesizes realistic audio waveforms directly from textual input. All models within LAHJA AI integrate transformers for deep linguistic analysis, enabling the generation of speech that reflects the local vocal characteristics and intonation patterns of each specific accent or dialect. This makes LAHJA AI particularly effective for applications requiring natural-sounding, accent-aware speech synthesis.