RexChan commited on
Commit
f4f38de
1 Parent(s): 3144c61

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. README.md +6 -4
  3. app.py +95 -0
  4. requirements.txt +10 -0
  5. test1.mp3 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ test1.mp3 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: ISOM5240 Group25 2
3
- emoji: 📉
4
- colorFrom: red
5
- colorTo: gray
6
  sdk: streamlit
7
  sdk_version: 1.32.2
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: ISOM5240 Group25
3
+ emoji: 👀
4
+ colorFrom: blue
5
+ colorTo: yellow
6
  sdk: streamlit
7
  sdk_version: 1.32.2
8
  app_file: app.py
9
  pinned: false
10
+ python_version: "3.8"
11
+
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
3
+ from transformers import pipeline
4
+ import librosa
5
+ import torch
6
+ from spleeter.separator import Separator
7
+ from pydub import AudioSegment
8
+ from IPython.display import Audio
9
+ import os
10
+ import accelerate
11
+
12
+ # steamlit setup
13
+ st.set_page_config(page_title="Sentiment Analysis on Your Cantonese Song",)
14
+ st.header("Cantonese Song Sentiment Analyzer")
15
+
16
+
17
+
18
+ # load song
19
+ input_file = ""
20
+ output_file = "/content/"
21
+
22
+ # preprocess and crop audio file
23
+ def audio_preprocess():
24
+ # separate music and vocal
25
+ separator = Separator('spleeter:2stems')
26
+ separator.separate_to_file(input_file, output_file)
27
+
28
+ # Crop the audio
29
+ start_time = 60000 # e.g. 30 seconds, 30000
30
+ end_time = 110000 # e.g. 40 seconds, 40000
31
+
32
+ audio = AudioSegment.from_file('/content/test1/vocals.wav')
33
+ cropped_audio = audio[start_time:end_time]
34
+ cropped_audio.export('/content/cropped_vocals.wav', format='wav') # save vocal audio file
35
+
36
+
37
+ # ASR transcription
38
+ def asr_model():
39
+ # load audio file
40
+ y, sr = librosa.load('cropped_vocals.wav', sr=16000)
41
+
42
+ # ASR model
43
+ MODEL_NAME = "RexChan/ISOM5240-whisper-small-zhhk_1"
44
+ processor = WhisperProcessor.from_pretrained(MODEL_NAME)
45
+ model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, low_cpu_mem_usage=True)
46
+
47
+ model.config.forced_decoder_ids = None
48
+ model.config.suppress_tokens = []
49
+ model.config.use_cache = False
50
+
51
+ processed_in = processor(y, sampling_rate=sr, return_tensors="pt")
52
+ gout = model.generate(
53
+ input_features=processed_in.input_features,
54
+ output_scores=True, return_dict_in_generate=True
55
+ )
56
+ transcription = processor.batch_decode(gout.sequences, skip_special_tokens=True)[0]
57
+
58
+ # print result
59
+ print(f"Song lyrics = {transcription}")
60
+
61
+ return transcription
62
+
63
+
64
+ # sentiment analysis
65
+ def senti_model(transcription):
66
+
67
+ pipe = pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
68
+ final_result = pipe(transcription)
69
+ print(f"Sentiment Analysis shows that this song is {final_result[0]['label']}. Confident level of this analysis is {final_result[0]['score']*100:.1f}%.")
70
+
71
+ return final_result
72
+
73
+
74
+ # main
75
+ def main():
76
+
77
+ input_file = st.file_uploader("upload a song in mp3 format", type="mp3") # upload song
78
+ if input_file is not None:
79
+ st.write("File uploaded successfully!")
80
+ else:
81
+ st.write("No file uploaded.")
82
+ audio_preprocess()
83
+ transcription = asr_model()
84
+ final_result = senti_model(transcription)
85
+
86
+ if st.button("Play Audio"):
87
+ st.audio(audio_data['audio'],
88
+ format="audio/wav",
89
+ start_time=0,
90
+ sample_rate = audio_data['sampling_rate'])
91
+
92
+
93
+ if __name__ == '__main__':
94
+ clicked = st.button("Run Analysis")
95
+ main()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ librosa
3
+ spleeter
4
+ pydub
5
+ torch
6
+ accelerate
7
+ tensorflow
8
+ ipython
9
+ protobuf==3.20.*
10
+ ffmpeg-python
test1.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34e9acc0397bc9b30351ca37ef126a1de61c0a933b82ac1cadde06cee965a569
3
+ size 3359169