|
import gradio as gr |
|
import numpy as np |
|
import librosa |
|
from transformers import pipeline |
|
import json |
|
|
|
|
|
emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2") |
|
speech_recognizer = pipeline("automatic-speech-recognition", |
|
model="kresnik/wav2vec2-large-xlsr-korean") |
|
|
|
|
|
current_stage = "intro" |
|
session_data = {} |
|
|
|
def create_interface(): |
|
with gr.Blocks(theme=gr.themes.Soft()) as app: |
|
|
|
state = gr.State(value={"stage": "intro", "session_data": {}}) |
|
|
|
|
|
gr.Markdown("# 디지털 굿판") |
|
|
|
|
|
with gr.Tabs() as tabs: |
|
|
|
with gr.Tab("입장", id="intro"): |
|
gr.Markdown(""" |
|
# 디지털 굿판에 오신 것을 환영합니다 |
|
온천천의 디지털 치유 공간으로 들어가보세요. |
|
""") |
|
intro_next = gr.Button("여정 시작하기") |
|
|
|
|
|
with gr.Tab("청신", id="cleansing", visible=False): |
|
with gr.Row(): |
|
audio_player = gr.Audio( |
|
value="path_to_default_sound.mp3", |
|
type="filepath", |
|
label="온천천의 소리" |
|
) |
|
location_info = gr.Textbox( |
|
label="현재 위치", |
|
value="온천장역", |
|
interactive=False |
|
) |
|
cleansing_next = gr.Button("다음 단계로") |
|
|
|
|
|
with gr.Tab("기원", id="voice", visible=False): |
|
with gr.Row(): |
|
|
|
voice_input = gr.Audio( |
|
label="목소리로 전하기", |
|
sources=["microphone", "upload"], |
|
type="filepath" |
|
) |
|
|
|
|
|
with gr.Column(): |
|
emotion_output = gr.JSON( |
|
label="감정 분석 결과", |
|
visible=True |
|
) |
|
text_output = gr.Textbox( |
|
label="음성 텍스트", |
|
visible=True |
|
) |
|
voice_next = gr.Button("다음 단계로") |
|
|
|
|
|
with gr.Tab("송신", id="sharing", visible=False): |
|
with gr.Row(): |
|
gr.Gallery( |
|
label="생성된 이미지", |
|
show_label=True, |
|
elem_id="gallery" |
|
) |
|
gr.Markdown("## 공동체와 함께 나누기") |
|
complete_button = gr.Button("완료") |
|
|
|
|
|
with gr.Row(visible=True) as float_menu: |
|
gr.Button("🏠", scale=1) |
|
gr.Button("🎵", scale=1) |
|
gr.Button("🎤", scale=1) |
|
gr.Button("🖼️", scale=1) |
|
|
|
|
|
def analyze_voice(audio_file, state): |
|
try: |
|
if audio_file is None: |
|
return {"error": "No audio input provided"}, state |
|
|
|
|
|
y, sr = librosa.load(audio_file) |
|
|
|
|
|
emotions = emotion_analyzer(y) |
|
primary_emotion = emotions[0] |
|
|
|
|
|
text_result = speech_recognizer(y) |
|
|
|
|
|
state["voice_analysis"] = { |
|
"emotion": primary_emotion['label'], |
|
"probability": float(primary_emotion['score']), |
|
"text": text_result['text'] |
|
} |
|
|
|
return { |
|
"emotion": primary_emotion['label'], |
|
"emotion_probability": f"{primary_emotion['score']:.2f}", |
|
"transcribed_text": text_result['text'], |
|
"status": "Analysis complete" |
|
}, state |
|
except Exception as e: |
|
return {"error": str(e), "status": "Error occurred"}, state |
|
|
|
|
|
voice_input.change( |
|
fn=analyze_voice, |
|
inputs=[voice_input, state], |
|
outputs=[emotion_output, state] |
|
) |
|
|
|
|
|
intro_next.click( |
|
fn=lambda s: {"stage": "cleansing", **s}, |
|
inputs=[state], |
|
outputs=[state], |
|
) |
|
|
|
cleansing_next.click( |
|
fn=lambda s: {"stage": "voice", **s}, |
|
inputs=[state], |
|
outputs=[state], |
|
) |
|
|
|
voice_next.click( |
|
fn=lambda s: {"stage": "sharing", **s}, |
|
inputs=[state], |
|
outputs=[state], |
|
) |
|
|
|
return app |
|
|
|
|
|
if __name__ == "__main__": |
|
app = create_interface() |
|
app.launch() |