Spaces:

ireneminhee
/

speech-to-depression

Sleeping

App Files Files Community

ireneminhee commited on Dec 4, 2024

Commit

a36df50

verified ·

1 Parent(s): 5276b72

Upload 4 files

Browse files

Files changed (4) hide show

app.py +56 -0
model/config.json +26 -0
model/model.safetensors +3 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# -*- coding: utf-8 -*-
+"""app.py
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1eyNEXhQE4T_7cq-MsPQ77p7h6xdrOpzk
+"""
+import gradio as gr
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import nemo.collections.asr as nemo_asr
+# STT 모델 로드
+stt_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="SungBeom/stt_kr_conformer_ctc_medium")
+# 우울증 판별 모델 로드
+model_path = "./model"
+model = AutoModelForSequenceClassification.from_pretrained(model_path)
+tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")
+# STT 변환 함수
+def speech_to_text(audio_file):
+    transcription = stt_model.transcribe(paths2audio_files=[audio_file])[0]
+    return transcription
+# 우울증 판별 함수
+def predict_depression(text):
+    inputs = tokenizer(text, return_tensors="pt")
+    outputs = model(**inputs)
+    probabilities = torch.sigmoid(outputs.logits)
+    depression_prob = probabilities[0, 1].item()
+    if depression_prob > 0.5:
+        return f"Depressed (Confidence: {depression_prob:.2%})"
+    else:
+        return f"Not Depressed (Confidence: {1 - depression_prob:.2%})"
+# 음성 입력 -> 우울증 판별 함수
+def speech_to_depression(audio_file):
+    text = speech_to_text(audio_file)
+    result = predict_depression(text)
+    return {"Transcription": text, "Depression Result": result}
+# Gradio 인터페이스
+interface = gr.Interface(
+    fn=speech_to_depression,
+    inputs=gr.Audio(source="microphone", type="filepath", label="Speak here"),
+    outputs=[
+        gr.Textbox(label="Transcription"),
+        gr.Textbox(label="Depression Detection Result"),
+    ],
+    title="Speech-to-Depression Detection",
+    description="Speak into the microphone. The app will transcribe your speech and detect depression likelihood.",
+)
+interface.launch()

model/config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "_name_or_path": "klue/bert-base",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 32000
+}

model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:068f298b04d4128810f80ad8033351e31ec22cdec0c6865014bf2f9a68ff165c
+size 442499064

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+transformers
+torch
+nemo_toolkit[all]