Spaces:
Runtime error
Runtime error
import os | |
import shutil | |
import sys | |
import numpy as np | |
# importing libraries | |
import requests | |
import glob | |
import soundfile as sf | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
import pydub | |
AudioSegment.converter = "/usr/bin/ffmpeg" | |
import io | |
from CaesarFolderInterface.caesarfolderinterface import CaesarFolderInterface | |
from CaesarMobileTranslate.caesarmobiletranslate import CaesarMobileTranslate | |
from CaesarMobileTTS.caesarmobiletts import CaesarMobileTTS | |
from tqdm import tqdm | |
def cosine_similarity(doc1,doc2): | |
import spacy | |
nlp = spacy.load("en_core_web_sm") | |
doc1 = nlp(doc1) | |
doc2 = nlp(doc2) | |
similarity = np.dot(doc1.vector, doc2.vector) / (np.linalg.norm(doc1.vector) * np.linalg.norm(doc2.vector)) | |
return similarity | |
# create a speech recognition object | |
r = sr.Recognizer() | |
class CaesarMobileTranscribe(CaesarFolderInterface): | |
def __init__(self) -> None: | |
super().__init__() | |
self.caesartrans = CaesarMobileTranslate() | |
self.caesartts = CaesarMobileTTS() | |
def create_all_dirs(self): | |
if self.audio_input_folder not in os.listdir(): | |
os.mkdir(self.audio_input_folder) | |
if self.notes_folder not in os.listdir(): | |
os.mkdir(self.notes_folder) | |
if self.audio_output_folder not in os.listdir(): | |
os.mkdir(self.audio_output_folder) | |
def store_audio(self,argfilename,contents,fileformat): | |
try: | |
recording = AudioSegment.from_file(io.BytesIO(contents)) # , format=fileformat | |
recording.export(f'{self.audio_input_folder}/{argfilename}.wav', format='wav') | |
return True | |
except Exception as ex: | |
print(type(ex),ex) | |
return False | |
def send_revisionbank(self,sentences,txtfilename): | |
boole = True | |
while boole == True: | |
sendrevisionbank = input("Send to RevisionBank: (y) or (n)").lower() | |
if sendrevisionbank == "y": | |
cardname = f'{txtfilename.split("/")[0]}/{txtfilename.split("/")[-1].replace(".txt","").capitalize()}' | |
json = {"revisioncardscheduler":{"sendtoemail":"[email protected]","revisionscheduleinterval":60,"revisioncards":[{"subject":f"A-Level {cardname}","revisioncardtitle":cardname,"revisioncard":sentences}]}} | |
loginjson = {"email":"[email protected]","password":"kya63amari"} | |
try: | |
print("Logging in...") | |
access_token = requests.post("https://revisionbank.onrender.com/loginapi",json=loginjson).json()["access_token"] | |
headers = {"Authorization": f"Bearer {access_token}"} | |
print("Logged in.") | |
except Exception as ex: | |
print("Login Failed.{}:{}".format(type(ex),ex)) | |
try: | |
print("Storing CaesarAI text...") | |
response = requests.post("https://revisionbank.onrender.com/storerevisioncards",json=json,headers=headers).json() | |
print("CaesarAI Stored.") | |
except Exception as ex: | |
print("CaesarAI Text not stored.".format(type(ex),ex)) | |
boole = False | |
elif sendrevisionbank == "n": | |
boole = False | |
else: | |
boole = True | |
def check_if_wav(self,argfilename): | |
folder = self.audio_input_folder | |
res = "" | |
for i in os.listdir(folder): | |
if argfilename in i : | |
res += i | |
if "wav" in res: | |
return True | |
else: | |
return False | |
def mp3_to_wav(self,src,dst): | |
folder = self.audio_input_folder | |
src = f"{folder}/{src}" | |
dst = f"{folder}/{dst}" | |
sound = AudioSegment.from_mp3(src) | |
sound.export(dst, format="wav") | |
os.remove(src) | |
def check_file_exists(self,argfilename): | |
folder = self.notes_folder | |
if folder in os.listdir(): | |
if f"{argfilename}.txt" in os.listdir(folder): | |
return True | |
else: | |
return False | |
else: | |
return True | |
def slice_sections(self,argfilename,largewav="large"): | |
filename = "{}/{}.wav".format(self.audio_input_folder,argfilename) | |
sound = AudioSegment.from_wav(filename) | |
if self.notes_folder not in os.listdir(): | |
os.mkdir(self.notes_folder) | |
if largewav == "small": | |
sentences ="" | |
txtfilename = "{}/{}.txt".format(self.notes_folder,argfilename) | |
with sr.AudioFile(filename) as source: | |
# listen for the data (load audio to memory) | |
audio_data = r.record(source) | |
# recognize (convert from speech to text) | |
text = r.recognize_google(audio_data) | |
#print(text) | |
with open(txtfilename,"w+") as f: | |
f.write(text) | |
with open(txtfilename,"r") as f: | |
text = f.read() | |
textlist = text.split("period") | |
#print(textlist) | |
with open(txtfilename,"w+") as f: | |
for t in textlist: | |
sentence = f"{t.rstrip().lstrip()}.\n".capitalize() | |
#print(sentence) | |
sentences += f"{sentence}\n" | |
f.write(sentence) | |
#print(textlist[0]) | |
#self.send_revisionbank(sentences,txtfilename) | |
print(sentences) | |
if largewav == "large": | |
sentences = "" | |
duration = sound.duration_seconds //60 | |
# 7 seconds - 3 minutes | |
#print(duration) | |
minute_intervals = 0.5# 0.15 or 0.50 # TODO Try 1,2 and 3 and see which is the most optimized by seeing themost words/letters collected. | |
percentages = [i * (minute_intervals/duration) for i in range(0,int(duration//minute_intervals))] | |
#print(percentages) | |
#percentages = [i/20 for i in range(0,20)]# 0.8 | |
# TODO Maximum audio time is 3.8 minutes, using percentage may be inconsistent if the audio duration increases. | |
slicedsections = [ sound[round(percentages[i] * len(sound)):round(percentages[i+1] * len(sound))] for i in range(len(percentages)-1) ] | |
return slicedsections | |
def run_api(self,argfilename,language_src,dest,slicedsections,new_sound,verbose=0): | |
filename = "{}/{}.wav".format(self.audio_input_folder,argfilename) | |
txtfilename = "{}/{}.txt".format(self.notes_folder,argfilename) | |
folder_name = "audio-chunks" | |
print("Starting...") | |
for i, audio_chunk in enumerate(tqdm(slicedsections), start=1): | |
# create a drectory to store the audio chunks | |
if not os.path.isdir(folder_name): | |
os.mkdir(folder_name) | |
print("Translating chunk{}.wav...".format(i)) | |
chunk_filename = os.path.join(folder_name, "chunk{}.wav".format(i)) | |
audio_chunk.export(chunk_filename, format="wav") | |
with sr.AudioFile(chunk_filename) as source: | |
audio_listened = r.record(source) | |
# try converting it to text | |
try: | |
text = r.recognize_google(audio_listened,language=language_src) | |
except sr.UnknownValueError as e: | |
print("Error:", str(e)) | |
else: | |
text = "{}. ".format(text.capitalize()) | |
try: | |
with open(txtfilename,"a+",encoding="utf-8") as f: | |
f.write("{}\n".format(text)) | |
with open(txtfilename,"r",encoding="utf-8") as f: | |
caesarnotesduplicate = f.readlines() | |
except UnicodeEncodeError as uex: | |
pass | |
try: | |
sim = cosine_similarity(caesarnotesduplicate[-1],caesarnotesduplicate[-2]) | |
if sim > 0.95: | |
caesarnotesduplicate.remove(caesarnotesduplicate[-1]) | |
try: | |
with open(txtfilename,"w+",encoding="utf-8") as f: | |
for word in caesarnotesduplicate: | |
f.write(word) | |
except UnicodeEncodeError as uex: | |
pass | |
except IndexError as iex: | |
pass | |
try: | |
print(chunk_filename, ":", text) | |
translation,dest,origin,src = self.caesartrans.translate(text,dest) | |
print(translation) | |
self.caesartts.run_tts(argfilename,translation,dest) | |
ttsfilename = f"{self.audio_output_folder}/{argfilename}.mp3" | |
sound = AudioSegment.from_mp3(ttsfilename) | |
new_sound += sound | |
yield i,new_sound,src,text,translation | |
except UnicodeEncodeError as uex: | |
pass | |
try: | |
shutil.rmtree('audio-chunks') | |
except FileNotFoundError as fex: | |
pass | |
def clean_up_wav(self,argfilename): | |
try: | |
folder = self.audio_input_folder | |
os.remove(f"{folder}/{argfilename}.wav") | |
return True | |
except Exception as ex: | |
return False | |
def clean_up_txt(self,argfilename): | |
try: | |
folder = self.notes_folder | |
os.remove(f"{folder}/{argfilename}.txt") | |
return True | |
except Exception as ex: | |
return False | |
def load_transcription(self,argfilename): | |
with open(f"{self.notes_folder}/{argfilename}.txt") as f: | |
text = f.read() | |
return text | |
def load_audio(self,argfilename,fileformat,folder): | |
print(f"{folder}/{argfilename}.{fileformat}") | |
try: | |
with open(f"{folder}/{argfilename}.{fileformat}","rb") as f: | |
contents = f.read() | |
return contents | |
except FileNotFoundError as fex: | |
return False | |
if __name__ == "__main__": | |
caesarmbtr = CaesarMobileTranscribe() | |
argfilename = "DIALOGUE_de" | |
dest = "fr" | |
ttsfilename_new = f"{caesarmbtr.audio_output_folder}/{argfilename}_new.mp3" | |
new_sound = AudioSegment.empty() | |
sliced_sections = caesarmbtr.slice_sections(argfilename) | |
for i,new_sound in caesarmbtr.run_api(argfilename,dest,sliced_sections,new_sound): | |
print(f"{i}:") | |
new_sound.export(ttsfilename_new, format="mp3") | |