ireneminhee commited on
Commit
a36df50
·
verified ·
1 Parent(s): 5276b72

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +56 -0
  2. model/config.json +26 -0
  3. model/model.safetensors +3 -0
  4. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app.py
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1eyNEXhQE4T_7cq-MsPQ77p7h6xdrOpzk
8
+ """
9
+ import gradio as gr
10
+ import torch
11
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
12
+ import nemo.collections.asr as nemo_asr
13
+
14
+ # STT 모델 로드
15
+ stt_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="SungBeom/stt_kr_conformer_ctc_medium")
16
+
17
+ # 우울증 판별 모델 로드
18
+ model_path = "./model"
19
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
20
+ tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")
21
+
22
+ # STT 변환 함수
23
+ def speech_to_text(audio_file):
24
+ transcription = stt_model.transcribe(paths2audio_files=[audio_file])[0]
25
+ return transcription
26
+
27
+ # 우울증 판별 함수
28
+ def predict_depression(text):
29
+ inputs = tokenizer(text, return_tensors="pt")
30
+ outputs = model(**inputs)
31
+ probabilities = torch.sigmoid(outputs.logits)
32
+ depression_prob = probabilities[0, 1].item()
33
+ if depression_prob > 0.5:
34
+ return f"Depressed (Confidence: {depression_prob:.2%})"
35
+ else:
36
+ return f"Not Depressed (Confidence: {1 - depression_prob:.2%})"
37
+
38
+ # 음성 입력 -> 우울증 판별 함수
39
+ def speech_to_depression(audio_file):
40
+ text = speech_to_text(audio_file)
41
+ result = predict_depression(text)
42
+ return {"Transcription": text, "Depression Result": result}
43
+
44
+ # Gradio 인터페이스
45
+ interface = gr.Interface(
46
+ fn=speech_to_depression,
47
+ inputs=gr.Audio(source="microphone", type="filepath", label="Speak here"),
48
+ outputs=[
49
+ gr.Textbox(label="Transcription"),
50
+ gr.Textbox(label="Depression Detection Result"),
51
+ ],
52
+ title="Speech-to-Depression Detection",
53
+ description="Speak into the microphone. The app will transcribe your speech and detect depression likelihood.",
54
+ )
55
+
56
+ interface.launch()
model/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "klue/bert-base",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.46.2",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 32000
26
+ }
model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:068f298b04d4128810f80ad8033351e31ec22cdec0c6865014bf2f9a68ff165c
3
+ size 442499064
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ nemo_toolkit[all]
5
+