VSPAN commited on
Commit
b8dff41
·
verified ·
1 Parent(s): d645fd7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -139
app.py CHANGED
@@ -3,159 +3,147 @@ import edge_tts
3
  import asyncio
4
  import tempfile
5
  import re
6
- from typing import Optional, Tuple, Dict
 
 
 
7
  from huggingface_hub import InferenceClient
 
 
8
 
9
- # Константы
10
- DEFAULT_RATE = 0
11
- DEFAULT_PITCH = 0
12
- HF_API_KEY = "YOUR_API_KEY" # Замените на ваш API ключ
13
 
14
- # Инициализация клиента для генерации текста
15
- text_client = InferenceClient(provider="together", api_key=HF_API_KEY)
 
 
 
16
 
17
- # Кастомная цветовая схема
18
- theme = gr.themes.Default(
19
- primary_hue="orange",
20
- secondary_hue="yellow",
21
- ).set(
22
- button_primary_background="linear-gradient(90deg, #ff9a00, #ffd700)",
23
- button_primary_background_hover="linear-gradient(90deg, #ff8c00, #ffcc00)",
24
- slider_color="#ff9a00",
25
- block_background="#fff5e6"
26
- )
27
-
28
- async def get_voices() -> Dict[str, str]:
29
  voices = await edge_tts.list_voices()
30
- return {
31
- f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName']
32
- for v in voices
33
- }
34
 
35
- async def text_to_speech(text: str, voice: str, rate: int, pitch: int) -> Tuple[Optional[str], Optional[str]]:
 
36
  if not text.strip():
37
- return None, None
38
- try:
39
- voice_short_name = voice.split(" - ")[0]
40
- communicate = edge_tts.Communicate(
41
- text,
42
- voice_short_name,
43
- rate=f"{rate:+d}%",
44
- pitch=f"{pitch:+d}Hz"
45
- )
46
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
47
- await communicate.save(tmp_file.name)
48
- return tmp_file.name, None
49
- except Exception:
50
- return None, None
51
-
52
- # ADD PROMPT HERE - Вставьте ваш промпт для генерации текста
53
- SYSTEM_PROMPT = """
54
- [Здесь будет ваш системный промпт]
55
- """
56
 
57
- async def generate_detailed_description(input_text: str) -> str:
58
- try:
59
- # Добавьте вашу логику обработки промпта
60
- messages = [
61
- {"role": "system", "content": SYSTEM_PROMPT},
62
- {"role": "user", "content": input_text}
63
- ]
64
-
65
- completion = text_client.chat.completions.create(
66
- model="deepseek-ai/DeepSeek-R1",
67
- messages=messages,
68
- max_tokens=500
69
- )
70
- return completion.choices[0].message.content
71
- except Exception:
72
- return ""
 
 
 
 
 
 
 
 
 
 
73
 
74
- async def tts_interface(text: str, voice: str, rate: int, pitch: int):
75
- return await text_to_speech(text, voice, rate, pitch)
 
 
 
 
 
 
 
 
 
76
 
77
- async def create_demo():
 
78
  voices = await get_voices()
79
 
80
- with gr.Blocks(theme=theme, title="AI Studio Pro") as demo:
81
- gr.Markdown("# 🧡💛 AI Creative Studio")
82
-
83
- # Вкладки
84
- with gr.Tabs():
85
- # Вкладка TTS
86
- with gr.Tab("🔊 Генерация речи"):
87
- with gr.Row():
88
- with gr.Column():
89
- text_input = gr.Textbox(
90
- label="Входной текст",
91
- lines=5,
92
- placeholder="Введите текст для озвучки...",
93
- elem_classes="orange-border"
94
- )
95
- with gr.Row():
96
- lang_dropdown = gr.Dropdown(
97
- choices=["Все языки", "en", "ru", "es", "fr", "de"],
98
- value="Все языки",
99
- label="Выберите язык"
100
- )
101
- voice_dropdown = gr.Dropdown(
102
- choices=list(voices.keys()),
103
- label="Выберите голос",
104
- interactive=True
105
- )
106
- with gr.Row():
107
- rate_slider = gr.Slider(-50, 50, 0, label="Скорость")
108
- pitch_slider = gr.Slider(-20, 20, 0, label="Тон")
109
- generate_btn = gr.Button("Сгенерировать речь", variant="primary")
110
-
111
- with gr.Column():
112
- audio_output = gr.Audio(label="Результат", elem_classes="orange-border")
113
-
114
- # Вкладка генерации контента
115
- with gr.Tab("✨ Генератор контента"):
116
- with gr.Row():
117
- with gr.Column():
118
- prompt_input = gr.Textbox(
119
- label="Краткое описание",
120
- lines=3,
121
- placeholder="Опишите идею для генерации...",
122
- elem_classes="orange-border"
123
- )
124
- with gr.Accordion("Дополнительные настройки", open=False):
125
- gr.Markdown("Здесь будут дополнительные параметры")
126
- generate_content_btn = gr.Button("Сгенерировать контент", variant="primary")
127
-
128
- with gr.Column():
129
- text_output = gr.Textbox(
130
- label="Сгенерированный текст",
131
- interactive=False,
132
- elem_classes="orange-border"
133
- )
134
- image_output = gr.Gallery(
135
- label="Сгенерированные изображения",
136
- columns=2,
137
- elem_classes="orange-border"
138
- )
139
 
140
- # Обработчики событий
141
- generate_btn.click(
142
- tts_interface,
143
- [text_input, voice_dropdown, rate_slider, pitch_slider],
144
- audio_output
145
- )
146
-
147
- generate_content_btn.click(
148
- generate_detailed_description,
149
- prompt_input,
150
- text_output
151
- )
 
 
 
 
 
 
 
 
 
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  return demo
154
 
155
- async def main():
156
- demo = await create_demo()
157
- demo.queue()
158
- demo.launch()
159
-
160
  if __name__ == "__main__":
161
- asyncio.run(main())
 
 
 
 
 
3
  import asyncio
4
  import tempfile
5
  import re
6
+ import emoji
7
+ import requests
8
+ from PIL import Image
9
+ from io import BytesIO
10
  from huggingface_hub import InferenceClient
11
+ import os
12
+ from dotenv import load_dotenv
13
 
14
+ # Загрузка переменных окружения из .env файла
15
+ load_dotenv()
 
 
16
 
17
+ # Функция для очистки текста от нежелательных символов и эмодзи
18
+ def clean_text(text):
19
+ text = re.sub(r'[*_~><]', '', text)
20
+ text = emoji.replace_emoji(text, replace='')
21
+ return text
22
 
23
+ # Get all available voices
24
+ async def get_voices():
 
 
 
 
 
 
 
 
 
 
25
  voices = await edge_tts.list_voices()
26
+ return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
 
 
 
27
 
28
+ # Text-to-speech function
29
+ async def text_to_speech(text, voice, rate, pitch):
30
  if not text.strip():
31
+ return None, "Please enter text to convert."
32
+ if not voice:
33
+ return None, "Please select a voice."
34
+
35
+ text = clean_text(text)
36
+ voice_short_name = voice.split(" - ")[0]
37
+ rate_str = f"{rate:+d}%"
38
+ pitch_str = f"{pitch:+d}Hz"
39
+ communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
40
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
41
+ tmp_path = tmp_file.name
42
+ try:
43
+ await communicate.save(tmp_path)
44
+ except Exception as e:
45
+ return None, f"An error occurred during text-to-speech conversion: {str(e)}"
46
+ return tmp_path, None
 
 
 
47
 
48
+ # Generate image using your custom prompt and API
49
+ def generate_image_with_prompt(prompt, api_key):
50
+ client = InferenceClient(
51
+ provider="together",
52
+ api_key=api_key
53
+ )
54
+ messages = [
55
+ {
56
+ "role": "user",
57
+ "content": prompt
58
+ }
59
+ ]
60
+ completion = client.chat.completions.create(
61
+ model="deepseek-ai/DeepSeek-R1",
62
+ messages=messages,
63
+ max_tokens=500
64
+ )
65
+ response = completion.choices[0].message.content
66
+ # Assuming the response contains a URL or image data
67
+ if "http" in response:
68
+ image_url = response
69
+ image_response = requests.get(image_url)
70
+ image = Image.open(BytesIO(image_response.content))
71
+ return image
72
+ else:
73
+ return None, "Failed to generate image."
74
 
75
+ # Gradio interface function
76
+ def tts_interface(*args):
77
+ loop = asyncio.new_event_loop()
78
+ asyncio.set_event_loop(loop)
79
+ audio, warning = loop.run_until_complete(text_to_speech(*args[:-2]))
80
+ prompt = args[-2]
81
+ image = generate_image_with_prompt(prompt, args[-1])
82
+ loop.close()
83
+ if warning:
84
+ return (None, gr.update(value=f"<span style='color:red;'>{warning}</span>", visible=True), None)
85
+ return (audio, gr.update(visible=False), image)
86
 
87
+ # Create Gradio application
88
+ async def create_demo(api_key):
89
  voices = await get_voices()
90
 
91
+ description = """
92
+ This is a simple text-to-speech and image generation application.
93
+ Enter any text to convert it into speech and describe an image to generate it.
94
+ Adjust the speech rate and pitch according to your preference.
95
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ css = """
98
+ .gradio-container {
99
+ background: linear-gradient(135deg, #e0eafc, #cfdef3);
100
+ color: #333;
101
+ }
102
+ .gr-button {
103
+ background-color: #007aff;
104
+ color: white;
105
+ }
106
+ .gr-button:hover {
107
+ background-color: #005bb5;
108
+ }
109
+ .gr-textbox, .gr-slider, .gr-dropdown {
110
+ border-radius: 8px;
111
+ padding: 10px;
112
+ margin: 10px 0;
113
+ }
114
+ .gr-markdown {
115
+ color: red;
116
+ }
117
+ """
118
 
119
+ demo = gr.Interface(
120
+ fn=tts_interface,
121
+ inputs=[
122
+ gr.Textbox(label="Input Text", lines=5),
123
+ gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
124
+ gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
125
+ gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
126
+ gr.Textbox(label="Image Prompt", lines=2),
127
+ gr.Textbox(label="Hugging Face API Key", value=api_key, type="password")
128
+ ],
129
+ outputs=[
130
+ gr.Audio(label="Generated Audio", type="filepath"),
131
+ gr.Markdown(label="Warning", visible=False),
132
+ gr.Image(label="Generated Image")
133
+ ],
134
+ title="Edge TTS Text-to-Speech & Image Generation",
135
+ description=description,
136
+ css=css,
137
+ article="",
138
+ analytics_enabled=False,
139
+ allow_flagging="manual"
140
+ )
141
  return demo
142
 
143
+ # Run the application
 
 
 
 
144
  if __name__ == "__main__":
145
+ api_key = os.getenv('HUGGING_FACE_API_KEY') # Ваш API ключ
146
+ if not api_key:
147
+ raise ValueError("Please set the HUGGING_FACE_API_KEY environment variable.")
148
+ demo = asyncio.run(create_demo(api_key))
149
+ demo.launch()