VSPAN commited on
Commit
65c0339
·
verified ·
1 Parent(s): b8dff41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -78
app.py CHANGED
@@ -4,19 +4,12 @@ import asyncio
4
  import tempfile
5
  import re
6
  import emoji
7
- import requests
8
- from PIL import Image
9
- from io import BytesIO
10
- from huggingface_hub import InferenceClient
11
- import os
12
- from dotenv import load_dotenv
13
-
14
- # Загрузка переменных окружения из .env файла
15
- load_dotenv()
16
 
17
  # Функция для очистки текста от нежелательных символов и эмодзи
18
  def clean_text(text):
 
19
  text = re.sub(r'[*_~><]', '', text)
 
20
  text = emoji.replace_emoji(text, replace='')
21
  return text
22
 
@@ -32,7 +25,9 @@ async def text_to_speech(text, voice, rate, pitch):
32
  if not voice:
33
  return None, "Please select a voice."
34
 
 
35
  text = clean_text(text)
 
36
  voice_short_name = voice.split(" - ")[0]
37
  rate_str = f"{rate:+d}%"
38
  pitch_str = f"{pitch:+d}Hz"
@@ -45,95 +40,40 @@ async def text_to_speech(text, voice, rate, pitch):
45
  return None, f"An error occurred during text-to-speech conversion: {str(e)}"
46
  return tmp_path, None
47
 
48
- # Generate image using your custom prompt and API
49
- def generate_image_with_prompt(prompt, api_key):
50
- client = InferenceClient(
51
- provider="together",
52
- api_key=api_key
53
- )
54
- messages = [
55
- {
56
- "role": "user",
57
- "content": prompt
58
- }
59
- ]
60
- completion = client.chat.completions.create(
61
- model="deepseek-ai/DeepSeek-R1",
62
- messages=messages,
63
- max_tokens=500
64
- )
65
- response = completion.choices[0].message.content
66
- # Assuming the response contains a URL or image data
67
- if "http" in response:
68
- image_url = response
69
- image_response = requests.get(image_url)
70
- image = Image.open(BytesIO(image_response.content))
71
- return image
72
- else:
73
- return None, "Failed to generate image."
74
-
75
  # Gradio interface function
76
  def tts_interface(*args):
77
  loop = asyncio.new_event_loop()
78
  asyncio.set_event_loop(loop)
79
- audio, warning = loop.run_until_complete(text_to_speech(*args[:-2]))
80
- prompt = args[-2]
81
- image = generate_image_with_prompt(prompt, args[-1])
82
  loop.close()
83
  if warning:
84
- return (None, gr.update(value=f"<span style='color:red;'>{warning}</span>", visible=True), None)
85
- return (audio, gr.update(visible=False), image)
86
 
87
  # Create Gradio application
88
- async def create_demo(api_key):
89
  voices = await get_voices()
90
 
91
  description = """
92
- This is a simple text-to-speech and image generation application.
93
- Enter any text to convert it into speech and describe an image to generate it.
94
  Adjust the speech rate and pitch according to your preference.
95
  """
96
 
97
- css = """
98
- .gradio-container {
99
- background: linear-gradient(135deg, #e0eafc, #cfdef3);
100
- color: #333;
101
- }
102
- .gr-button {
103
- background-color: #007aff;
104
- color: white;
105
- }
106
- .gr-button:hover {
107
- background-color: #005bb5;
108
- }
109
- .gr-textbox, .gr-slider, .gr-dropdown {
110
- border-radius: 8px;
111
- padding: 10px;
112
- margin: 10px 0;
113
- }
114
- .gr-markdown {
115
- color: red;
116
- }
117
- """
118
-
119
  demo = gr.Interface(
120
  fn=tts_interface,
121
  inputs=[
122
  gr.Textbox(label="Input Text", lines=5),
123
  gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
124
  gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
125
- gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
126
- gr.Textbox(label="Image Prompt", lines=2),
127
- gr.Textbox(label="Hugging Face API Key", value=api_key, type="password")
128
  ],
129
  outputs=[
130
  gr.Audio(label="Generated Audio", type="filepath"),
131
- gr.Markdown(label="Warning", visible=False),
132
- gr.Image(label="Generated Image")
133
  ],
134
- title="Edge TTS Text-to-Speech & Image Generation",
135
  description=description,
136
- css=css,
137
  article="",
138
  analytics_enabled=False,
139
  allow_flagging="manual"
@@ -142,8 +82,5 @@ async def create_demo(api_key):
142
 
143
  # Run the application
144
  if __name__ == "__main__":
145
- api_key = os.getenv('HUGGING_FACE_API_KEY') # Ваш API ключ
146
- if not api_key:
147
- raise ValueError("Please set the HUGGING_FACE_API_KEY environment variable.")
148
- demo = asyncio.run(create_demo(api_key))
149
- demo.launch()
 
4
  import tempfile
5
  import re
6
  import emoji
 
 
 
 
 
 
 
 
 
7
 
8
  # Функция для очистки текста от нежелательных символов и эмодзи
9
  def clean_text(text):
10
+ # Удаление указанных символов
11
  text = re.sub(r'[*_~><]', '', text)
12
+ # Удаление эмодзи
13
  text = emoji.replace_emoji(text, replace='')
14
  return text
15
 
 
25
  if not voice:
26
  return None, "Please select a voice."
27
 
28
+ # Очистка текста
29
  text = clean_text(text)
30
+
31
  voice_short_name = voice.split(" - ")[0]
32
  rate_str = f"{rate:+d}%"
33
  pitch_str = f"{pitch:+d}Hz"
 
40
  return None, f"An error occurred during text-to-speech conversion: {str(e)}"
41
  return tmp_path, None
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # Gradio interface function
44
  def tts_interface(*args):
45
  loop = asyncio.new_event_loop()
46
  asyncio.set_event_loop(loop)
47
+ audio, warning = loop.run_until_complete(text_to_speech(*args))
 
 
48
  loop.close()
49
  if warning:
50
+ return None, gr.update(value=f"<span style='color:red;'>{warning}</span>", visible=True)
51
+ return audio, gr.update(visible=False)
52
 
53
  # Create Gradio application
54
+ async def create_demo():
55
  voices = await get_voices()
56
 
57
  description = """
58
+ This is a simple text-to-speech application using Microsoft Edge TTS.
59
+ You can enter any text and select the voice you want to use.
60
  Adjust the speech rate and pitch according to your preference.
61
  """
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  demo = gr.Interface(
64
  fn=tts_interface,
65
  inputs=[
66
  gr.Textbox(label="Input Text", lines=5),
67
  gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
68
  gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
69
+ gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
 
 
70
  ],
71
  outputs=[
72
  gr.Audio(label="Generated Audio", type="filepath"),
73
+ gr.Markdown(label="Warning", visible=False)
 
74
  ],
75
+ title="Edge TTS Text-to-Speech",
76
  description=description,
 
77
  article="",
78
  analytics_enabled=False,
79
  allow_flagging="manual"
 
82
 
83
  # Run the application
84
  if __name__ == "__main__":
85
+ demo = asyncio.run(create_demo())
86
+ demo.launch()