ISOM5240_Group25_testing

Sleeping

App Files Files Community

RexChan commited on Mar 22, 2024

Commit

f4f38de

verified ·

1 Parent(s): 3144c61

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
README.md +6 -4
app.py +95 -0
requirements.txt +10 -0
test1.mp3 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+test1.mp3 filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,14 @@
 ---
-title: ISOM5240 Group25 2
-emoji: 📉
-colorFrom: red
-colorTo: gray
 sdk: streamlit
 sdk_version: 1.32.2
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ISOM5240 Group25
+emoji: 👀
+colorFrom: blue
+colorTo: yellow
 sdk: streamlit
 sdk_version: 1.32.2
 app_file: app.py
 pinned: false
+python_version: "3.8"
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import streamlit as st
+from transformers import WhisperForConditionalGeneration, WhisperProcessor
+from transformers import pipeline
+import librosa
+import torch
+from spleeter.separator import Separator
+from pydub import AudioSegment
+from IPython.display import Audio
+import os
+import accelerate
+# steamlit setup
+st.set_page_config(page_title="Sentiment Analysis on Your Cantonese Song",)
+st.header("Cantonese Song Sentiment Analyzer")
+# load song
+input_file = ""
+output_file = "/content/"
+# preprocess and crop audio file
+def audio_preprocess():
+    # separate music and vocal
+    separator = Separator('spleeter:2stems')
+    separator.separate_to_file(input_file, output_file)
+    # Crop the audio
+    start_time = 60000  # e.g. 30 seconds, 30000
+    end_time = 110000  # e.g. 40 seconds, 40000
+    audio = AudioSegment.from_file('/content/test1/vocals.wav')
+    cropped_audio = audio[start_time:end_time]
+    cropped_audio.export('/content/cropped_vocals.wav', format='wav') # save vocal audio file
+# ASR transcription
+def asr_model():
+    # load audio file
+    y, sr = librosa.load('cropped_vocals.wav', sr=16000)
+    # ASR model
+    MODEL_NAME = "RexChan/ISOM5240-whisper-small-zhhk_1"
+    processor = WhisperProcessor.from_pretrained(MODEL_NAME)
+    model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, low_cpu_mem_usage=True)
+    model.config.forced_decoder_ids = None
+    model.config.suppress_tokens = []
+    model.config.use_cache = False
+    processed_in = processor(y, sampling_rate=sr, return_tensors="pt")
+    gout = model.generate(
+        input_features=processed_in.input_features,
+        output_scores=True, return_dict_in_generate=True
+    )
+    transcription = processor.batch_decode(gout.sequences, skip_special_tokens=True)[0]
+    # print result
+    print(f"Song lyrics = {transcription}")
+    return transcription
+# sentiment analysis
+def senti_model(transcription):
+    pipe = pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
+    final_result = pipe(transcription)
+    print(f"Sentiment Analysis shows that this song is {final_result[0]['label']}. Confident level of this analysis is {final_result[0]['score']*100:.1f}%.")
+    return final_result
+# main
+def main():
+    input_file = st.file_uploader("upload a song in mp3 format", type="mp3") # upload song
+    if input_file is not None:
+      st.write("File uploaded successfully!")
+    else:
+      st.write("No file uploaded.")
+    audio_preprocess()
+    transcription = asr_model()
+    final_result = senti_model(transcription)
+    if st.button("Play Audio"):
+        st.audio(audio_data['audio'],
+                    format="audio/wav",
+                    start_time=0,
+                    sample_rate = audio_data['sampling_rate'])
+if __name__ == '__main__':
+    clicked = st.button("Run Analysis")
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+transformers
+librosa
+spleeter
+pydub
+torch
+accelerate
+tensorflow
+ipython
+protobuf==3.20.*
+ffmpeg-python

test1.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34e9acc0397bc9b30351ca37ef126a1de61c0a933b82ac1cadde06cee965a569
+size 3359169