File size: 7,612 Bytes
1d000a8
 
 
 
 
 
 
f3b050a
 
 
5145c3f
 
 
 
1d000a8
f3b050a
 
 
4b5ced5
1d000a8
5145c3f
 
 
f3b050a
1d000a8
 
91cb011
5145c3f
 
 
1d000a8
5924dfb
 
 
1d000a8
 
 
5145c3f
1d000a8
5145c3f
 
1d000a8
 
 
 
5145c3f
1d000a8
 
 
 
ff61c28
5145c3f
1d000a8
 
 
 
 
ff61c28
1d000a8
 
 
f3b050a
 
 
 
4aade4a
 
 
 
 
 
 
 
f3b050a
 
 
 
4aade4a
 
 
1d000a8
4b5ced5
1d000a8
5145c3f
1d000a8
 
 
 
5145c3f
4aade4a
 
1d000a8
f3b050a
 
 
1d000a8
 
 
bc96c8f
 
 
 
 
 
 
 
 
 
 
4b5ced5
f3b050a
ffecdc4
f3b050a
 
 
bc96c8f
ffecdc4
bc96c8f
f3b050a
 
ffecdc4
f3b050a
 
bcb395d
4b5ced5
1d000a8
4b5ced5
1d000a8
4b5ced5
 
ff61c28
4b5ced5
ff61c28
bcb395d
5145c3f
1d000a8
c9496c6
 
4aade4a
1d000a8
4aade4a
1d000a8
4b5ced5
1d000a8
13a2732
ff61c28
bcb395d
5145c3f
8a961ed
 
1d000a8
8a961ed
1d000a8
4b5ced5
1d000a8
13a2732
c9496c6
bcb395d
5145c3f
8a961ed
 
1d000a8
8a961ed
5145c3f
84098ad
9a178d1
 
 
 
 
84098ad
bc96c8f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
import librosa
import librosa.display
import gradio as gr
import soundfile as sf
import os
import logging
import tempfile

# Constants
DEFAULT_FONT_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
DEFAULT_SAMPLE_RATE = 22050

# Setup logging
logging.basicConfig(level=logging.INFO)

# Function for creating a spectrogram image with text
def text_to_spectrogram_image(text, base_width=512, height=256, max_font_size=80, margin=10, letter_spacing=5):
    try:
        font = ImageFont.truetype(DEFAULT_FONT_PATH, max_font_size)
    except IOError:
        logging.warning(f"Font not found at {DEFAULT_FONT_PATH}. Using default font.")
        font = ImageFont.load_default()

    draw = ImageDraw.Draw(Image.new('L', (1, 1)))

    text_width = sum(draw.textbbox((0, 0), char, font=font)[2] - draw.textbbox((0, 0), char, font=font)[0] + letter_spacing for char in text) - letter_spacing
    text_height = draw.textbbox((0, 0), text[0], font=font)[3] - draw.textbbox((0, 0), text[0], font=font)[1]

    # Adjust width and height based on text size
    width = max(base_width, text_width + margin * 2)
    height = max(height, text_height + margin * 2)

    image = Image.new('L', (width, height), 'black')
    draw = ImageDraw.Draw(image)

    text_x = (width - text_width) // 2
    text_y = (height - text_height) // 2

    for char in text:
        draw.text((text_x, text_y), char, font=font, fill='white')
        char_bbox = draw.textbbox((0, 0), char, font=font)
        text_x += char_bbox[2] - char_bbox[0] + letter_spacing

    image = np.array(image)
    image = np.where(image > 0, 255, image)
    return image

# Converting an image to audio
def spectrogram_image_to_audio(image, sr=DEFAULT_SAMPLE_RATE):
    flipped_image = np.flipud(image)
    S = flipped_image.astype(np.float32) / 255.0 * 100.0
    y = librosa.griffinlim(S)
    return y

# Function for creating an audio file and spectrogram from text
def create_audio_with_spectrogram(text, base_width, height, max_font_size, margin, letter_spacing):
    spec_image = text_to_spectrogram_image(text, base_width, height, max_font_size, margin, letter_spacing)
    y = spectrogram_image_to_audio(spec_image)
    
    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
        audio_path = temp_audio.name
        sf.write(audio_path, y, DEFAULT_SAMPLE_RATE)
    
    # Create spectrogram from audio
    S = librosa.feature.melspectrogram(y=y, sr=DEFAULT_SAMPLE_RATE)
    S_dB = librosa.power_to_db(S, ref=np.max)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=DEFAULT_SAMPLE_RATE, x_axis='time', y_axis='mel')
    plt.axis('off')
    plt.tight_layout(pad=0)
    
    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_spectrogram:
        spectrogram_path = temp_spectrogram.name
        plt.savefig(spectrogram_path, bbox_inches='tight', pad_inches=0, transparent=True)
    plt.close()
    
    return audio_path, spectrogram_path

# Function for displaying the spectrogram of an audio file
def display_audio_spectrogram(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    S = librosa.feature.melspectrogram(y=y, sr=sr)
    S_dB = librosa.power_to_db(S, ref=np.max)

    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
    plt.axis('off')
    plt.tight_layout(pad=0)

    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_spectrogram:
        spectrogram_path = temp_spectrogram.name
        plt.savefig(spectrogram_path, bbox_inches='tight', pad_inches=0, transparent=True)
    plt.close()
    return spectrogram_path

# Converting a downloaded image to an audio spectrogram
def image_to_spectrogram_audio(image_path, sr=DEFAULT_SAMPLE_RATE):
    image = Image.open(image_path).convert('L')
    image = np.array(image)
    y = spectrogram_image_to_audio(image, sr)
    
    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
        img2audio_path = temp_audio.name
        sf.write(img2audio_path, y, sr)
    return img2audio_path

# Gradio interface
def gradio_interface_fn(text, base_width, height, max_font_size, margin, letter_spacing):
    logging.info(f"Generating audio and spectrogram for text:\n{text}\n")
    audio_path, spectrogram_path = create_audio_with_spectrogram(text, base_width, height, max_font_size, margin, letter_spacing)
    return audio_path, spectrogram_path

def gradio_image_to_audio_fn(upload_image):
    logging.info(f"Converting image to audio:\n{upload_image}\n")
    return image_to_spectrogram_audio(upload_image)

def gradio_decode_fn(upload_audio):
    logging.info(f"Generating spectrogram for audio:\n{upload_audio}\n")
    return display_audio_spectrogram(upload_audio)

with gr.Blocks(title='Audio Steganography', css="footer{display:none !important}", theme=gr.themes.Soft(primary_hue="green", secondary_hue="green", spacing_size="sm", radius_size="lg")) as txt2spec:
    with gr.Tab("Text to Spectrogram"):
        with gr.Group():
            text = gr.Textbox(lines=2, placeholder="Enter your text:", label="Text")
            with gr.Row(variant='panel'):
                base_width = gr.Slider(value=512, label="Image Width", visible=False)
                height = gr.Slider(value=256, label="Image Height", visible=False)
                max_font_size = gr.Slider(minimum=10, maximum=130, step=5, value=80, label="Font size")
                margin = gr.Slider(minimum=0, maximum=50, step=1, value=10, label="Indent")
                letter_spacing = gr.Slider(minimum=0, maximum=50, step=1, value=5, label="Letter spacing")
            generate_button = gr.Button("Generate", variant='primary', size="lg")

        with gr.Column(variant='panel'):
            with gr.Group():
                output_audio = gr.Audio(type="filepath", label="Generated audio")
                output_spectrogram = gr.Image(type="filepath", label="Spectrogram")

        generate_button.click(gradio_interface_fn, inputs=[text, base_width, height, max_font_size, margin, letter_spacing], outputs=[output_audio, output_spectrogram])

    with gr.Tab("Image to Spectrogram"):
        with gr.Group():
            with gr.Column():
                upload_image = gr.Image(type="filepath", label="Upload image")
                convert_button = gr.Button("Convert to audio", variant='primary', size="lg")

        with gr.Column(variant='panel'):
            output_audio_from_image = gr.Audio(type="filepath", label="Generated audio")

        convert_button.click(gradio_image_to_audio_fn, inputs=[upload_image], outputs=[output_audio_from_image])

    with gr.Tab("Audio Spectrogram"):
        with gr.Group():
            with gr.Column():
                upload_audio = gr.Audio(type="filepath", label="Upload audio", scale=3)
                decode_button = gr.Button("Show spectrogram", variant='primary', size="lg")

        with gr.Column(variant='panel'):
            decoded_image = gr.Image(type="filepath", label="Audio Spectrogram")

        decode_button.click(gradio_decode_fn, inputs=[upload_audio], outputs=[decoded_image])

    with gr.Tab("test"):
        with gr.Group():
            with gr.Row():
                generate_btn = gr.Button("Generate", variant="primary", scale=2)
                converted_voice = gr.Audio(label='Converted Voice', interactive=False, scale=9)
                output_format = gr.Dropdown(['mp3', 'flac', 'wav'], value='mp3', label='File Format', scale=1)

txt2spec.launch(share=True)