File size: 5,402 Bytes
bb63470 9f7512d bb63470 9f7512d bb63470 9f7512d bb63470 9f7512d bb63470 9f7512d bb63470 9f7512d bb63470 9f7512d bb63470 9f7512d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import gradio as gr
import numpy as np
import librosa
from transformers import pipeline
import json
# Initialize AI models
emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
speech_recognizer = pipeline("automatic-speech-recognition",
model="kresnik/wav2vec2-large-xlsr-korean")
# Global state management
current_stage = "intro"
session_data = {}
def create_interface():
with gr.Blocks(theme=gr.themes.Soft()) as app:
# State management
state = gr.State(value={"stage": "intro", "session_data": {}})
# Header
gr.Markdown("# 디지털 굿판")
# Navigation tabs
with gr.Tabs() as tabs:
# Intro/세계관 Stage
with gr.Tab("입장", id="intro"):
gr.Markdown("""
# 디지털 굿판에 오신 것을 환영합니다
온천천의 디지털 치유 공간으로 들어가보세요.
""")
intro_next = gr.Button("여정 시작하기")
# 청신 Stage (Sound Purification)
with gr.Tab("청신", id="cleansing", visible=False):
with gr.Row():
audio_player = gr.Audio(
value="path_to_default_sound.mp3", # 기본 사운드 파일
type="filepath",
label="온천천의 소리"
)
location_info = gr.Textbox(
label="현재 위치",
value="온천장역",
interactive=False
)
cleansing_next = gr.Button("다음 단계로")
# 기원 Stage (Voice Analysis)
with gr.Tab("기원", id="voice", visible=False):
with gr.Row():
# Voice input component
voice_input = gr.Audio(
label="목소리로 전하기",
sources=["microphone", "upload"],
type="filepath"
)
# Analysis results
with gr.Column():
emotion_output = gr.JSON(
label="감정 분석 결과",
visible=True
)
text_output = gr.Textbox(
label="음성 텍스트",
visible=True
)
voice_next = gr.Button("다음 단계로")
# 송신 Stage (Sharing)
with gr.Tab("송신", id="sharing", visible=False):
with gr.Row():
gr.Gallery(
label="생성된 이미지",
show_label=True,
elem_id="gallery"
)
gr.Markdown("## 공동체와 함께 나누기")
complete_button = gr.Button("완료")
# Floating navigation menu
with gr.Row(visible=True) as float_menu:
gr.Button("🏠", scale=1)
gr.Button("🎵", scale=1)
gr.Button("🎤", scale=1)
gr.Button("🖼️", scale=1)
# Voice analysis function
def analyze_voice(audio_file, state):
try:
if audio_file is None:
return {"error": "No audio input provided"}, state
# Load audio
y, sr = librosa.load(audio_file)
# Emotion analysis
emotions = emotion_analyzer(y)
primary_emotion = emotions[0]
# Speech to text
text_result = speech_recognizer(y)
# Update state
state["voice_analysis"] = {
"emotion": primary_emotion['label'],
"probability": float(primary_emotion['score']),
"text": text_result['text']
}
return {
"emotion": primary_emotion['label'],
"emotion_probability": f"{primary_emotion['score']:.2f}",
"transcribed_text": text_result['text'],
"status": "Analysis complete"
}, state
except Exception as e:
return {"error": str(e), "status": "Error occurred"}, state
# Event handlers
voice_input.change(
fn=analyze_voice,
inputs=[voice_input, state],
outputs=[emotion_output, state]
)
# Stage navigation
intro_next.click(
fn=lambda s: {"stage": "cleansing", **s},
inputs=[state],
outputs=[state],
)
cleansing_next.click(
fn=lambda s: {"stage": "voice", **s},
inputs=[state],
outputs=[state],
)
voice_next.click(
fn=lambda s: {"stage": "sharing", **s},
inputs=[state],
outputs=[state],
)
return app
# Launch the application
if __name__ == "__main__":
app = create_interface()
app.launch() |