saimanoj1605 commited on
Commit
d39bedc
·
verified ·
1 Parent(s): 5af839b

create app.py

Browse files
Files changed (1) hide show
  1. app.py +349 -0
app.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from gtts import gTTS
5
+ import io
6
+ import tempfile
7
+ import os
8
+ import json
9
+
10
+ # Configuration (since we don't have the config.py file)
11
+ MODEL_CONFIG = {
12
+ "models": {
13
+ "granite-3b": "ibm-granite/granite-3b-code-base",
14
+ "granite-8b": "ibm-granite/granite-8b-code-base"
15
+ },
16
+ "generation_params": {
17
+ "max_new_tokens": 512,
18
+ "temperature": 0.7,
19
+ "do_sample": True,
20
+ "pad_token_id": None
21
+ }
22
+ }
23
+
24
+ TTS_CONFIG = {
25
+ "engine": "gtts",
26
+ "voice_speed": 150,
27
+ "voice_volume": 0.9
28
+ }
29
+
30
+ TONE_PROMPTS = {
31
+ "Neutral": "Rewrite the following text in a clear, neutral tone suitable for audiobook narration:",
32
+ "Suspenseful": "Rewrite the following text with suspenseful, engaging language that builds tension:",
33
+ "Inspiring": "Rewrite the following text in an inspiring, motivational tone that uplifts the reader:"
34
+ }
35
+
36
+ # Global variables to store model
37
+ model = None
38
+ tokenizer = None
39
+ model_loaded = False
40
+
41
+ def load_granite_model(model_name="granite-3b"):
42
+ """Load IBM Granite model locally"""
43
+ global model, tokenizer, model_loaded
44
+
45
+ model_id = MODEL_CONFIG["models"][model_name]
46
+
47
+ try:
48
+ # Load tokenizer
49
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
50
+ if tokenizer.pad_token is None:
51
+ tokenizer.pad_token = tokenizer.eos_token
52
+
53
+ # Load model
54
+ model = AutoModelForCausalLM.from_pretrained(
55
+ model_id,
56
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
57
+ device_map="auto" if torch.cuda.is_available() else None,
58
+ trust_remote_code=True
59
+ )
60
+
61
+ model_loaded = True
62
+ return "✅ Model loaded successfully!"
63
+ except Exception as e:
64
+ model_loaded = False
65
+ return f"❌ Error loading model: {str(e)}"
66
+
67
+ def rewrite_text_with_granite(text, tone):
68
+ """Rewrite text using local Granite model"""
69
+ global model, tokenizer, model_loaded
70
+
71
+ if not model_loaded or model is None or tokenizer is None:
72
+ return text
73
+
74
+ try:
75
+ # Create prompt
76
+ prompt = f"{TONE_PROMPTS[tone]}\n\nOriginal text: {text}\n\nRewritten text:"
77
+
78
+ # Tokenize
79
+ inputs = tokenizer(
80
+ prompt,
81
+ return_tensors="pt",
82
+ truncation=True,
83
+ max_length=1024
84
+ )
85
+
86
+ # Set pad_token_id for generation
87
+ generation_params = MODEL_CONFIG["generation_params"].copy()
88
+ generation_params["pad_token_id"] = tokenizer.pad_token_id
89
+
90
+ # Generate
91
+ with torch.no_grad():
92
+ outputs = model.generate(
93
+ inputs.input_ids,
94
+ **generation_params,
95
+ attention_mask=inputs.attention_mask
96
+ )
97
+
98
+ # Decode
99
+ generated_text = tokenizer.decode(
100
+ outputs[0],
101
+ skip_special_tokens=True
102
+ )
103
+
104
+ # Extract only the rewritten part
105
+ if "Rewritten text:" in generated_text:
106
+ rewritten = generated_text.split("Rewritten text:")[-1].strip()
107
+ else:
108
+ rewritten = generated_text[len(prompt):].strip()
109
+
110
+ return rewritten if rewritten else text
111
+
112
+ except Exception as e:
113
+ return f"Error rewriting text: {str(e)}"
114
+
115
+ def generate_audio_gtts(text, language='en'):
116
+ """Generate audio using Google Text-to-Speech"""
117
+ try:
118
+ tts = gTTS(text=text, lang=language, slow=False)
119
+
120
+ # Save to temporary file and return path
121
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
122
+ tts.save(tmp_file.name)
123
+ return tmp_file.name
124
+
125
+ except Exception as e:
126
+ return None
127
+
128
+ def process_audiobook(input_text, uploaded_file, tone, model_choice):
129
+ """Main processing function"""
130
+ global model_loaded
131
+
132
+ # Check if model is loaded
133
+ if not model_loaded:
134
+ return (
135
+ "❌ Please load the AI model first!",
136
+ None,
137
+ None,
138
+ "Please click 'Load Model' button first."
139
+ )
140
+
141
+ # Determine input text
142
+ text_to_process = ""
143
+ if uploaded_file is not None:
144
+ try:
145
+ # Read uploaded file
146
+ content = uploaded_file.read()
147
+ if isinstance(content, bytes):
148
+ text_to_process = content.decode('utf-8')
149
+ else:
150
+ text_to_process = str(content)
151
+ except Exception as e:
152
+ return f"Error reading file: {str(e)}", None, None, ""
153
+ elif input_text:
154
+ text_to_process = input_text
155
+ else:
156
+ return "Please provide text input or upload a file.", None, None, ""
157
+
158
+ # Truncate if too long
159
+ if len(text_to_process) > 2000:
160
+ text_to_process = text_to_process[:2000]
161
+ status_msg = "⚠️ Text truncated to 2000 characters for optimal processing."
162
+ else:
163
+ status_msg = f"✅ Processing {len(text_to_process)} characters."
164
+
165
+ # Rewrite text with AI
166
+ try:
167
+ rewritten_text = rewrite_text_with_granite(text_to_process, tone)
168
+ except Exception as e:
169
+ return f"Error in text rewriting: {str(e)}", None, None, ""
170
+
171
+ # Generate audio
172
+ try:
173
+ audio_file_path = generate_audio_gtts(rewritten_text)
174
+ if audio_file_path is None:
175
+ return status_msg, text_to_process, rewritten_text, "❌ Failed to generate audio."
176
+ except Exception as e:
177
+ return status_msg, text_to_process, rewritten_text, f"Error generating audio: {str(e)}"
178
+
179
+ return (
180
+ status_msg,
181
+ text_to_process,
182
+ rewritten_text,
183
+ audio_file_path
184
+ )
185
+
186
+ def get_model_status():
187
+ """Get current model status"""
188
+ global model_loaded
189
+ if model_loaded:
190
+ device = "GPU" if torch.cuda.is_available() else "CPU"
191
+ return f"✅ Model loaded on {device}"
192
+ else:
193
+ return "❌ Model not loaded"
194
+
195
+ # Create Gradio interface
196
+ def create_interface():
197
+ with gr.Blocks(
198
+ title="EchoVerse - Local AI Audiobook Creator",
199
+ theme=gr.themes.Soft(),
200
+ css="""
201
+ .gradio-container {
202
+ font-family: 'Arial', sans-serif;
203
+ }
204
+ .main-header {
205
+ text-align: center;
206
+ color: #2E86AB;
207
+ margin-bottom: 20px;
208
+ }
209
+ .status-box {
210
+ padding: 10px;
211
+ border-radius: 5px;
212
+ margin: 10px 0;
213
+ }
214
+ """
215
+ ) as demo:
216
+
217
+ # Header
218
+ gr.HTML("""
219
+ <div class="main-header">
220
+ <h1>��� EchoVerse Local</h1>
221
+ <h3>Transform Text into Expressive Audiobooks with Local AI</h3>
222
+ <p><i>Powered by IBM Granite 3B - No internet required for AI processing!</i></p>
223
+ </div>
224
+ """)
225
+
226
+ # Model Setup Section
227
+ with gr.Group():
228
+ gr.HTML("<h2>��� AI Model Setup</h2>")
229
+
230
+ with gr.Row():
231
+ model_choice = gr.Dropdown(
232
+ choices=list(MODEL_CONFIG["models"].keys()),
233
+ value="granite-3b",
234
+ label="Choose Granite Model",
235
+ info="3B model is recommended for most computers. 8B requires more RAM."
236
+ )
237
+
238
+ load_btn = gr.Button("Load Model", variant="primary")
239
+
240
+ model_status = gr.Textbox(
241
+ label="Model Status",
242
+ value="❌ Model not loaded",
243
+ interactive=False
244
+ )
245
+
246
+ # Input Section
247
+ with gr.Group():
248
+ gr.HTML("<h2>��� Input Your Content</h2>")
249
+
250
+ uploaded_file = gr.File(
251
+ label="Upload a text file",
252
+ file_types=[".txt"],
253
+ type="binary"
254
+ )
255
+
256
+ input_text = gr.Textbox(
257
+ label="Or paste your text here:",
258
+ lines=8,
259
+ placeholder="Enter the text you want to convert to an audiobook...",
260
+ max_lines=15
261
+ )
262
+
263
+ # Configuration Section
264
+ with gr.Group():
265
+ gr.HTML("<h2>⚙️ Audio Configuration</h2>")
266
+
267
+ with gr.Row():
268
+ tone = gr.Dropdown(
269
+ choices=["Neutral", "Suspenseful", "Inspiring"],
270
+ value="Neutral",
271
+ label="Select Tone",
272
+ info="Choose how you want the text to be rewritten"
273
+ )
274
+
275
+ # Generate Button
276
+ generate_btn = gr.Button("��� Generate Audiobook", variant="primary", size="lg")
277
+
278
+ # Results Section
279
+ with gr.Group():
280
+ gr.HTML("<h2>��� Results</h2>")
281
+
282
+ status_output = gr.Textbox(
283
+ label="Status",
284
+ interactive=False
285
+ )
286
+
287
+ with gr.Row():
288
+ original_text = gr.Textbox(
289
+ label="Original Text",
290
+ lines=10,
291
+ interactive=False
292
+ )
293
+
294
+ rewritten_text = gr.Textbox(
295
+ label="Rewritten Text",
296
+ lines=10,
297
+ interactive=False
298
+ )
299
+
300
+ # Audio Output
301
+ gr.HTML("<h2>��� Your Audiobook</h2>")
302
+ audio_output = gr.Audio(
303
+ label="Generated Audiobook",
304
+ type="filepath"
305
+ )
306
+
307
+ # System Info
308
+ with gr.Group():
309
+ gr.HTML("<h2>��� System Info</h2>")
310
+
311
+ system_info = gr.HTML(f"""
312
+ <div>
313
+ <p><strong>GPU Available:</strong> {'✅ Yes' if torch.cuda.is_available() else '❌ No (CPU only)'}</p>
314
+ <p><strong>TTS Engine:</strong> {TTS_CONFIG['engine']}</p>
315
+ </div>
316
+
317
+ <h3>��� Tips</h3>
318
+ <ul>
319
+ <li>First model load takes time</li>
320
+ <li>3B model: ~6GB RAM needed</li>
321
+ <li>8B model: ~16GB RAM needed</li>
322
+ <li>GPU greatly speeds up processing</li>
323
+ <li>gTTS requires internet connection</li>
324
+ </ul>
325
+ """)
326
+
327
+ # Event handlers
328
+ load_btn.click(
329
+ fn=load_granite_model,
330
+ inputs=[model_choice],
331
+ outputs=[model_status]
332
+ )
333
+
334
+ generate_btn.click(
335
+ fn=process_audiobook,
336
+ inputs=[input_text, uploaded_file, tone, model_choice],
337
+ outputs=[status_output, original_text, rewritten_text, audio_output]
338
+ )
339
+
340
+ return demo
341
+
342
+ # Launch the app
343
+ if __name__ == "__main__":
344
+ demo = create_interface()
345
+ demo.launch(
346
+ server_name="0.0.0.0",
347
+ server_port=7860,
348
+ share=False
349
+ )