Spaces:

haepada
/

roots

Running

App Files Files Community

roots / app.py

haepada

Update app.py

9f7512d verified 6 months ago

raw

history blame

5.4 kB

	import gradio as gr
	import numpy as np
	import librosa
	from transformers import pipeline
	import json

	# Initialize AI models
	emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
	speech_recognizer = pipeline("automatic-speech-recognition",
	model="kresnik/wav2vec2-large-xlsr-korean")

	# Global state management
	current_stage = "intro"
	session_data = {}

	def create_interface():
	with gr.Blocks(theme=gr.themes.Soft()) as app:
	# State management
	state = gr.State(value={"stage": "intro", "session_data": {}})

	# Header
	gr.Markdown("# 디지털 굿판")

	# Navigation tabs
	with gr.Tabs() as tabs:
	# Intro/세계관 Stage
	with gr.Tab("입장", id="intro"):
	gr.Markdown("""
	# 디지털 굿판에 오신 것을 환영합니다
	온천천의 디지털 치유 공간으로 들어가보세요.
	""")
	intro_next = gr.Button("여정 시작하기")

	# 청신 Stage (Sound Purification)
	with gr.Tab("청신", id="cleansing", visible=False):
	with gr.Row():
	audio_player = gr.Audio(
	value="path_to_default_sound.mp3", # 기본 사운드 파일
	type="filepath",
	label="온천천의 소리"
	)
	location_info = gr.Textbox(
	label="현재 위치",
	value="온천장역",
	interactive=False
	)
	cleansing_next = gr.Button("다음 단계로")

	# 기원 Stage (Voice Analysis)
	with gr.Tab("기원", id="voice", visible=False):
	with gr.Row():
	# Voice input component
	voice_input = gr.Audio(
	label="목소리로 전하기",
	sources=["microphone", "upload"],
	type="filepath"
	)

	# Analysis results
	with gr.Column():
	emotion_output = gr.JSON(
	label="감정 분석 결과",
	visible=True
	)
	text_output = gr.Textbox(
	label="음성 텍스트",
	visible=True
	)
	voice_next = gr.Button("다음 단계로")

	# 송신 Stage (Sharing)
	with gr.Tab("송신", id="sharing", visible=False):
	with gr.Row():
	gr.Gallery(
	label="생성된 이미지",
	show_label=True,
	elem_id="gallery"
	)
	gr.Markdown("## 공동체와 함께 나누기")
	complete_button = gr.Button("완료")

	# Floating navigation menu
	with gr.Row(visible=True) as float_menu:
	gr.Button("🏠", scale=1)
	gr.Button("🎵", scale=1)
	gr.Button("🎤", scale=1)
	gr.Button("🖼️", scale=1)

	# Voice analysis function
	def analyze_voice(audio_file, state):
	try:
	if audio_file is None:
	return {"error": "No audio input provided"}, state

	# Load audio
	y, sr = librosa.load(audio_file)

	# Emotion analysis
	emotions = emotion_analyzer(y)
	primary_emotion = emotions[0]

	# Speech to text
	text_result = speech_recognizer(y)

	# Update state
	state["voice_analysis"] = {
	"emotion": primary_emotion['label'],
	"probability": float(primary_emotion['score']),
	"text": text_result['text']
	}

	return {
	"emotion": primary_emotion['label'],
	"emotion_probability": f"{primary_emotion['score']:.2f}",
	"transcribed_text": text_result['text'],
	"status": "Analysis complete"
	}, state
	except Exception as e:
	return {"error": str(e), "status": "Error occurred"}, state

	# Event handlers
	voice_input.change(
	fn=analyze_voice,
	inputs=[voice_input, state],
	outputs=[emotion_output, state]
	)

	# Stage navigation
	intro_next.click(
	fn=lambda s: {"stage": "cleansing", **s},
	inputs=[state],
	outputs=[state],
	)

	cleansing_next.click(
	fn=lambda s: {"stage": "voice", **s},
	inputs=[state],
	outputs=[state],
	)

	voice_next.click(
	fn=lambda s: {"stage": "sharing", **s},
	inputs=[state],
	outputs=[state],
	)

	return app

	# Launch the application
	if __name__ == "__main__":
	app = create_interface()
	app.launch()