Spaces:
Running
Running
import gradio as gr | |
import requests | |
import gradio as gr | |
from gradio_client import Client | |
import json | |
import re | |
from moviepy.editor import VideoFileClip | |
from moviepy.audio.AudioClip import AudioClip | |
# Pexels 이미지 검색 함수 | |
def search_pexels_images(query): | |
API_KEY = '5woz23MGx1QrSY0WHFb0BRi29JvbXPu97Hg0xnklYgHUI8G0w23FKH62' | |
url = f"https://api.pexels.com/v1/search?query={query}&per_page=80" | |
headers = {"Authorization": API_KEY} | |
response = requests.get(url, headers=headers) | |
data = response.json() | |
images_urls = [photo['src']['medium'] for photo in data['photos']] | |
return images_urls | |
# Pexels 이미지 검색 결과 표시 함수 | |
def show_search_results(query): | |
images_urls = search_pexels_images(query) | |
return images_urls | |
def extract_audio(video_in): | |
input_video = video_in | |
output_audio = 'audio.wav' | |
# Open the video file and extract the audio | |
video_clip = VideoFileClip(input_video) | |
audio_clip = video_clip.audio | |
# Save the audio as a .wav file | |
audio_clip.write_audiofile(output_audio, fps=44100) # Use 44100 Hz as the sample rate for .wav files | |
print("Audio extraction complete.") | |
return 'audio.wav' | |
def get_caption_from_kosmos(image_in): | |
kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/") | |
kosmos2_result = kosmos2_client.predict( | |
image_in, # str (filepath or URL to image) in 'Test Image' Image component | |
"Detailed", # str in 'Description Type' Radio component | |
fn_index=4 | |
) | |
print(f"KOSMOS2 RETURNS: {kosmos2_result}") | |
with open(kosmos2_result[1], 'r') as f: | |
data = json.load(f) | |
reconstructed_sentence = [] | |
for sublist in data: | |
reconstructed_sentence.append(sublist[0]) | |
full_sentence = ' '.join(reconstructed_sentence) | |
#print(full_sentence) | |
# Find the pattern matching the expected format ("Describe this image in detail:" followed by optional space and then the rest)... | |
pattern = r'^Describe this image in detail:\s*(.*)$' | |
# Apply the regex pattern to extract the description text. | |
match = re.search(pattern, full_sentence) | |
if match: | |
description = match.group(1) | |
print(description) | |
else: | |
print("Unable to locate valid description.") | |
# Find the last occurrence of "." | |
last_period_index = description.rfind('.') | |
# Truncate the string up to the last period | |
truncated_caption = description[:last_period_index + 1] | |
# print(truncated_caption) | |
print(f"\n—\nIMAGE CAPTION: {truncated_caption}") | |
return truncated_caption | |
def get_caption(image_in): | |
client = Client("https://vikhyatk-moondream1.hf.space/") | |
result = client.predict( | |
image_in, # filepath in 'image' Image component | |
"Describe precisely the image in one sentence.", # str in 'Question' Textbox component | |
api_name="/answer_question" | |
) | |
print(result) | |
return result | |
def get_magnet(prompt): | |
amended_prompt = f"{prompt}" | |
print(amended_prompt) | |
client = Client("https://fffiloni-magnet.hf.space/") | |
result = client.predict( | |
"facebook/audio-magnet-medium", # Literal['facebook/magnet-small-10secs', 'facebook/magnet-medium-10secs', 'facebook/magnet-small-30secs', 'facebook/magnet-medium-30secs', 'facebook/audio-magnet-small', 'facebook/audio-magnet-medium'] in 'Model' Radio component | |
"", # str in 'Model Path (custom models)' Textbox component | |
amended_prompt, # str in 'Input Text' Textbox component | |
3, # float in 'Temperature' Number component | |
0.9, # float in 'Top-p' Number component | |
10, # float in 'Max CFG coefficient' Number component | |
1, # float in 'Min CFG coefficient' Number component | |
20, # float in 'Decoding Steps (stage 1)' Number component | |
10, # float in 'Decoding Steps (stage 2)' Number component | |
10, # float in 'Decoding Steps (stage 3)' Number component | |
10, # float in 'Decoding Steps (stage 4)' Number component | |
"prod-stride1 (new!)", # Literal['max-nonoverlap', 'prod-stride1 (new!)'] in 'Span Scoring' Radio component | |
api_name="/predict_full" | |
) | |
print(result) | |
return result[1] | |
def get_audioldm(prompt): | |
client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/") | |
result = client.predict( | |
prompt, # str in 'Input text' Textbox component | |
"Low quality. Music.", # str in 'Negative prompt' Textbox component | |
10, # int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component | |
3.5, # int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component | |
45, # int | float in 'Seed' Number component | |
3, # int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component | |
fn_index=1 | |
) | |
print(result) | |
audio_result = extract_audio(result) | |
return audio_result | |
def get_audiogen(prompt): | |
client = Client("https://fffiloni-audiogen.hf.space/") | |
result = client.predict( | |
prompt, | |
10, | |
api_name="/infer" | |
) | |
return result | |
def infer(image_in, chosen_model): | |
caption = get_caption(image_in) | |
if chosen_model == "MAGNet" : | |
magnet_result = get_magnet(caption) | |
return magnet_result | |
elif chosen_model == "AudioLDM-2" : | |
audioldm_result = get_audioldm(caption) | |
return audioldm_result | |
elif chosen_model == "AudioGen" : | |
audiogen_result = get_audiogen(caption) | |
return audiogen_result | |
css=""" | |
#col-container{ | |
margin: 0 auto; | |
max-width: 800px; | |
} | |
""" | |
with gr.Blocks() as app: | |
with gr.Tabs(): | |
with gr.TabItem("Image to Audio"): | |
with gr.Column(): | |
gr.Markdown("### Image to Audio") | |
image_in = gr.Image(sources=["upload"], type="filepath", label="Image input") | |
chosen_model = gr.Radio(label="Choose a model", choices=["MAGNet", "AudioLDM-2", "AudioGen"], value="AudioLDM-2") | |
submit_btn = gr.Button("Submit") | |
audio_o = gr.Audio(label="Audio output") | |
submit_btn.click( | |
fn=infer, | |
inputs=[image_in, chosen_model], | |
outputs=audio_o | |
) | |
with gr.TabItem("FREE Image Search"): | |
with gr.Column(): | |
gr.Markdown("### FREE Image Search") | |
search_query = gr.Textbox(label="사진 검색") | |
search_btn = gr.Button("검색") | |
images_output = gr.Gallery(label="검색 결과 이미지") | |
search_btn.click( | |
fn=show_search_results, | |
inputs=search_query, | |
outputs=images_output | |
) | |
app.launch(debug=True) |