Spaces:

palondomus
/

CaesarAITranscribeTL

Runtime error

App Files Files Community

palondomus commited on Sep 8, 2023

Commit

5ffb1d1

0 Parent(s):

Mostly works trying for huggingface

Browse files

Files changed (20) hide show

.gitattributes +35 -0
CaesarFolderInterface/__pycache__/caesarfolderinterface.cpython-39.pyc +0 -0
CaesarFolderInterface/caesarfolderinterface.py +26 -0
CaesarMobileTTS/__pycache__/caesarmobiletts.cpython-39.pyc +0 -0
CaesarMobileTTS/caesarmobiletts.py +48 -0
CaesarMobileTranscribe/__pycache__/caesartranscribe.cpython-39.pyc +0 -0
CaesarMobileTranscribe/caesartranscribe.py +270 -0
CaesarMobileTranslate/__pycache__/caesarmobiletranslate.cpython-39.pyc +0 -0
CaesarMobileTranslate/caesarmobiletranslate.py +17 -0
CaesarSQLDB +1 -0
Dockerfile +38 -0
README.md +8 -0
__pycache__/main.cpython-39.pyc +0 -0
caesarmobile.py +28 -0
fly.toml +17 -0
main.py +158 -0
main_db.py +176 -0
python_version +1 -0
requirements.txt +58 -0
requirements_win.txt +90 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

CaesarFolderInterface/__pycache__/caesarfolderinterface.cpython-39.pyc ADDED Viewed

Binary file (1.41 kB). View file

CaesarFolderInterface/caesarfolderinterface.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from pydub import AudioSegment
+import io
+import os
+class CaesarFolderInterface:
+    def __init__(self) -> None:
+        self.audio_input_folder = "CaesarAudioWAVs"
+        self.notes_folder = "CaesarNotes"
+        self.audio_output_folder = "CaesarAudioTranslations"
+    def clean_all(self):
+        try:
+            for i in os.listdir(self.audio_input_folder):
+                os.remove(f"{self.audio_input_folder}/{i}")
+            for i in os.listdir(self.notes_folder):
+                os.remove(f"{self.notes_folder}/{i}")
+            for i in os.listdir(self.audio_output_folder):
+                os.remove(f"{self.audio_output_folder}/{i}")
+        except Exception as ex:
+            return False
+    def store_audio(self,argfilename,contents):
+        try:
+            recording = AudioSegment.from_file(io.BytesIO(contents), format="mp3")
+            recording.export(f'{self.audio_output_folder}/{argfilename}.mp3', format='mp3')
+            return True
+        except Exception as ex:
+            return False

CaesarMobileTTS/__pycache__/caesarmobiletts.cpython-39.pyc ADDED Viewed

Binary file (1.86 kB). View file

CaesarMobileTTS/caesarmobiletts.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from gtts import gTTS
+from CaesarFolderInterface.caesarfolderinterface import CaesarFolderInterface
+import os
+class CaesarMobileTTS(CaesarFolderInterface):
+    def __init__(self) -> None:
+        super().__init__()
+    def load_transcription(self,argfilename):
+        with open(f"{self.audio_output_folder}/{argfilename}.mp3","rb") as f:
+            contents = f.read()
+        return contents
+    def check_file_exists(self,argfilename):
+        folder =  self.audio_output_folder
+        if folder in os.listdir():
+            if f"{argfilename}.mp3" in os.listdir(folder):
+                return True
+            else:
+                return False
+        else:
+            return True
+    def clean_up_tts(self,argfilename):
+        try:
+            folder = self.audio_output_folder
+            os.remove(f"{folder}/{argfilename}.mp3")
+            return True
+        except Exception as ex:
+            return False
+    def run_tts(self,argfilename,text,language):
+        try:
+            myobj = gTTS(text=text, lang=language, slow=False)
+            # Saving the converted audio in a mp3 file named
+            # welcome
+            if self.audio_output_folder not in os.listdir():
+                os.mkdir(self.audio_output_folder)
+            myobj.save(f"{self.audio_output_folder}/{argfilename}.mp3")
+            # Playing the converted file
+            #os.system("mpg321 welcome.mp3")
+            worked = True
+            error = None
+        except Exception as ex:
+            worked = False
+            error = f"{type(ex)} - {ex}"
+        return worked,error

CaesarMobileTranscribe/__pycache__/caesartranscribe.cpython-39.pyc ADDED Viewed

Binary file (8.81 kB). View file

CaesarMobileTranscribe/caesartranscribe.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import os
+import shutil
+import sys
+import numpy as np
+# importing libraries
+import requests
+import glob
+import soundfile as sf
+import speech_recognition as sr
+from pydub import AudioSegment
+import pydub
+AudioSegment.converter = "/usr/bin/ffmpeg"
+import io
+from CaesarFolderInterface.caesarfolderinterface import CaesarFolderInterface
+from CaesarMobileTranslate.caesarmobiletranslate import CaesarMobileTranslate
+from CaesarMobileTTS.caesarmobiletts import CaesarMobileTTS
+from tqdm import tqdm
+def cosine_similarity(doc1,doc2):
+    import spacy
+    nlp = spacy.load("en_core_web_sm")
+    doc1 = nlp(doc1)
+    doc2 = nlp(doc2)
+    similarity = np.dot(doc1.vector, doc2.vector) / (np.linalg.norm(doc1.vector) * np.linalg.norm(doc2.vector))
+    return similarity
+# create a speech recognition object
+r = sr.Recognizer()
+class CaesarMobileTranscribe(CaesarFolderInterface):
+    def __init__(self) -> None:
+        super().__init__()
+        self.caesartrans = CaesarMobileTranslate()
+        self.caesartts = CaesarMobileTTS()
+    def create_all_dirs(self):
+        if self.audio_input_folder not in os.listdir():
+            os.mkdir(self.audio_input_folder)
+        if self.notes_folder not in os.listdir():
+            os.mkdir(self.notes_folder)
+        if self.audio_output_folder not in os.listdir():
+            os.mkdir(self.audio_output_folder)
+    def store_audio(self,argfilename,contents,fileformat):
+        try:
+            recording = AudioSegment.from_file(io.BytesIO(contents)) # , format=fileformat
+            recording.export(f'{self.audio_input_folder}/{argfilename}.wav', format='wav')
+            return True
+        except Exception as ex:
+            print(type(ex),ex)
+            return False
+    def send_revisionbank(self,sentences,txtfilename):
+        boole = True
+        while boole == True:
+            sendrevisionbank = input("Send to RevisionBank: (y) or (n)").lower()
+            if sendrevisionbank == "y":
+                cardname = f'{txtfilename.split("/")[0]}/{txtfilename.split("/")[-1].replace(".txt","").capitalize()}'
+                json = {"revisioncardscheduler":{"sendtoemail":"[email protected]","revisionscheduleinterval":60,"revisioncards":[{"subject":f"A-Level {cardname}","revisioncardtitle":cardname,"revisioncard":sentences}]}}
+                loginjson = {"email":"[email protected]","password":"kya63amari"}
+                try:
+                    print("Logging in...")
+                    access_token = requests.post("https://revisionbank.onrender.com/loginapi",json=loginjson).json()["access_token"]
+                    headers = {"Authorization": f"Bearer {access_token}"}
+                    print("Logged in.")
+                except Exception as ex:
+                    print("Login Failed.{}:{}".format(type(ex),ex))
+                try:
+                    print("Storing CaesarAI text...")
+                    response = requests.post("https://revisionbank.onrender.com/storerevisioncards",json=json,headers=headers).json()
+                    print("CaesarAI Stored.")
+                except Exception as ex:
+                    print("CaesarAI Text not stored.".format(type(ex),ex))
+                boole = False
+            elif sendrevisionbank == "n":
+                boole = False
+            else:
+                boole = True
+    def check_if_wav(self,argfilename):
+        folder = self.audio_input_folder
+        res = ""
+        for i in os.listdir(folder):
+            if argfilename in i :
+                res += i
+        if "wav" in res:
+            return True
+        else:
+            return False
+    def mp3_to_wav(self,src,dst):
+        folder = self.audio_input_folder
+        src = f"{folder}/{src}"
+        dst = f"{folder}/{dst}"
+        sound = AudioSegment.from_mp3(src)
+        sound.export(dst, format="wav")
+        os.remove(src)
+    def check_file_exists(self,argfilename):
+        folder =  self.notes_folder
+        if folder in os.listdir():
+            if f"{argfilename}.txt" in os.listdir(folder):
+                return True
+            else:
+                return False
+        else:
+            return True
+    def slice_sections(self,argfilename,largewav="large"):
+        filename = "{}/{}.wav".format(self.audio_input_folder,argfilename)
+        sound = AudioSegment.from_wav(filename)
+        if self.notes_folder not in os.listdir():
+            os.mkdir(self.notes_folder)
+        if largewav == "small":
+            sentences =""
+            txtfilename = "{}/{}.txt".format(self.notes_folder,argfilename)
+            with sr.AudioFile(filename) as source:
+                # listen for the data (load audio to memory)
+                audio_data = r.record(source)
+                # recognize (convert from speech to text)
+                text = r.recognize_google(audio_data)
+                #print(text)
+                with open(txtfilename,"w+") as f:
+                    f.write(text)
+                with open(txtfilename,"r") as f:
+                    text = f.read()
+                    textlist  = text.split("period")
+                    #print(textlist)
+                with open(txtfilename,"w+") as f:
+                    for t in textlist:
+                        sentence = f"{t.rstrip().lstrip()}.\n".capitalize()
+                        #print(sentence)
+                        sentences += f"{sentence}\n"
+                        f.write(sentence)
+                #print(textlist[0])
+            #self.send_revisionbank(sentences,txtfilename)
+            print(sentences)
+        if largewav == "large":
+            sentences = ""
+            duration  = sound.duration_seconds //60
+            # 7 seconds - 3 minutes
+            #print(duration)
+            minute_intervals = 0.5# 0.15 or 0.50 # TODO Try 1,2 and 3 and see which is the most optimized by seeing themost words/letters collected.
+            percentages = [i * (minute_intervals/duration) for i in range(0,int(duration//minute_intervals))]
+            #print(percentages)
+            #percentages = [i/20 for i in range(0,20)]# 0.8
+            # TODO Maximum audio time is 3.8 minutes, using percentage may be inconsistent if the audio duration increases.
+            slicedsections =  [ sound[round(percentages[i] * len(sound)):round(percentages[i+1] * len(sound))] for i in range(len(percentages)-1) ]
+            return slicedsections
+    def run_api(self,argfilename,dest,slicedsections,new_sound,verbose=0):
+            filename = "{}/{}.wav".format(self.audio_input_folder,argfilename)
+            txtfilename = "{}/{}.txt".format(self.notes_folder,argfilename)
+            folder_name = "audio-chunks"
+            print("Starting...")
+            for i, audio_chunk in enumerate(tqdm(slicedsections), start=1):
+                # create a drectory to store the audio chunks
+                if not os.path.isdir(folder_name):
+                    os.mkdir(folder_name)
+                print("Translating chunk{}.wav...".format(i))
+                chunk_filename = os.path.join(folder_name, "chunk{}.wav".format(i))
+                audio_chunk.export(chunk_filename, format="wav")
+                with sr.AudioFile(chunk_filename) as source:
+                    audio_listened = r.record(source)
+                    # try converting it to text
+                    try:
+                        text = r.recognize_google(audio_listened)
+                    except sr.UnknownValueError as e:
+                        print("Error:", str(e))
+                    else:
+                        text = "{}. ".format(text.capitalize())
+                        try:
+                            with open(txtfilename,"a+",encoding="utf-8") as f:
+                                f.write("{}\n".format(text))
+                            with open(txtfilename,"r",encoding="utf-8") as f:
+                                caesarnotesduplicate = f.readlines()
+                        except UnicodeEncodeError as uex:
+                            pass
+                        try:
+                            sim = cosine_similarity(caesarnotesduplicate[-1],caesarnotesduplicate[-2])
+                            if sim > 0.95:
+                                caesarnotesduplicate.remove(caesarnotesduplicate[-1])
+                                try:
+                                    with open(txtfilename,"w+",encoding="utf-8") as f:
+                                        for word in caesarnotesduplicate:
+                                            f.write(word)
+                                except UnicodeEncodeError as uex:
+                                    pass
+                        except IndexError as iex:
+                            pass
+                        try:
+                            print(chunk_filename, ":", text)
+                            translation,dest,origin,src = self.caesartrans.translate(text,dest)
+                            print(translation)
+                            self.caesartts.run_tts(argfilename,translation,dest)
+                            ttsfilename = f"{self.audio_output_folder}/{argfilename}.mp3"
+                            sound = AudioSegment.from_mp3(ttsfilename)
+                            new_sound += sound
+                            yield i,new_sound,src,text,translation
+                        except UnicodeEncodeError as uex:
+                            pass
+                try:
+                    shutil.rmtree('audio-chunks')
+                except FileNotFoundError as fex:
+                    pass
+    def clean_up_wav(self,argfilename):
+        try:
+            folder = self.audio_input_folder
+            os.remove(f"{folder}/{argfilename}.wav")
+            return True
+        except Exception as ex:
+            return False
+    def clean_up_txt(self,argfilename):
+        try:
+            folder = self.notes_folder
+            os.remove(f"{folder}/{argfilename}.txt")
+            return True
+        except Exception as ex:
+            return False
+    def load_transcription(self,argfilename):
+        with open(f"{self.notes_folder}/{argfilename}.txt") as f:
+            text = f.read()
+        return text
+    def load_audio(self,argfilename,fileformat,folder):
+        print(f"{folder}/{argfilename}.{fileformat}")
+        try:
+            with open(f"{folder}/{argfilename}.{fileformat}","rb") as f:
+                contents = f.read()
+            return contents
+        except FileNotFoundError as fex:
+            return False
+if __name__ == "__main__":
+    caesarmbtr = CaesarMobileTranscribe()
+    argfilename = "DIALOGUE_de"
+    dest = "fr"
+    ttsfilename_new = f"{caesarmbtr.audio_output_folder}/{argfilename}_new.mp3"
+    new_sound = AudioSegment.empty()
+    sliced_sections = caesarmbtr.slice_sections(argfilename)
+    for i,new_sound in caesarmbtr.run_api(argfilename,dest,sliced_sections,new_sound):
+        print(f"{i}:")
+    new_sound.export(ttsfilename_new, format="mp3")

CaesarMobileTranslate/__pycache__/caesarmobiletranslate.cpython-39.pyc ADDED Viewed

Binary file (1.1 kB). View file

CaesarMobileTranslate/caesarmobiletranslate.py ADDED Viewed

	@@ -0,0 +1,17 @@

+#https://thepythoncode.com/article/translate-text-in-python?utm_content=cmp-true
+from googletrans import Translator
+from CaesarFolderInterface.caesarfolderinterface import CaesarFolderInterface
+class CaesarMobileTranslate(CaesarFolderInterface):
+    def __init__(self):
+        super().__init__()
+        self.translator = Translator()
+    def translate(self,text,dest,verbose=0):
+        # translate a spanish text to english text (by default)
+        translation = self.translator.translate(text,dest=dest)
+        if verbose == 1:
+            print(f"{translation.origin} ({translation.src}) --> {translation.text} ({translation.dest})")
+        return translation.text,translation.dest,translation.origin,translation.src
+if __name__ == "__main__":
+    caesarmobtrans = CaesarMobileTranslate()
+    caesarmobtrans.translate("Hola Mundo","fr")

CaesarSQLDB ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 03f2a8eaeb36e2499f577322db716add29d81371

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+# Use the official Python 3.9 image
+FROM python:3.8
+RUN export PYTHONPATH=$PWD
+RUN apt-get -y update
+RUN apt-get -y upgrade
+RUN apt-get install -y ffmpeg
+RUN pip install --upgrade pip
+RUN pip install uvicorn
+# Set the working directory to /code
+WORKDIR /code
+#VOLUME /home/amari/Desktop/CaesarAI/CaesarFastAPI /code
+# Copy the current directory contents into the container at /code
+COPY ./requirements.txt /code/requirements.txt
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN python -m spacy download en_core_web_sm
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+# Local
+#CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860","--reload"]
+# Fly.io
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080","--reload"]

README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+title: CaesarAITranscribeTL
+emoji: 🏆
+colorFrom: red
+colorTo: gray
+sdk: docker
+pinned: false
+---

__pycache__/main.cpython-39.pyc ADDED Viewed

Binary file (5.38 kB). View file

caesarmobile.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from CaesarMobileTranscribe.caesartranscribe import CaesarMobileTranscribe
+from CaesarMobileTranslate.caesarmobiletranslate import CaesarMobileTranslate
+from CaesarMobileTTS.caesarmobiletts import CaesarMobileTTS
+from CaesarFolderInterface.caesarfolderinterface import CaesarFolderInterface
+from CaesarSQLDB.caesar_create_tables import CaesarCreateTables
+from CaesarSQLDB.caesarcrud import CaesarCRUD
+from CaesarSQLDB.caesarhash import CaesarHash
+from pydub import AudioSegment
+if __name__ == "__main__":
+    argfilename = "DIALOGUE" # "audio-sample-1" #
+    language = "fr"
+    caesarfolders = CaesarFolderInterface()
+    caesarmobtrb = CaesarMobileTranscribe()
+    caesarmobtrans = CaesarMobileTranslate()
+    caesarmobtts = CaesarMobileTTS()
+    caesarcrud = CaesarCRUD()
+    caesarcreatetables = CaesarCreateTables()
+    caesarcreatetables.create(caesarcrud)
+    fields = ("filename","src","dest","translationhash","original_transcript","translated_transcript","translated_audio_contents")
+    table = "translations"
+    hash_input = argfilename + language
+    translationhash = CaesarHash.hash_text(hash_input)
+    condition = f"translationhash = '{translationhash}'"
+    translation_exists = caesarcrud.check_exists(("*"),table,condition)
+    new = AudioSegment.empty()
+    new.export(f"{caesarfolders.audio_output_folder}/{argfilename}_start.mp3", format="mp3")

fly.toml ADDED Viewed

	@@ -0,0 +1,17 @@

+# fly.toml app configuration file generated for caesarmobiletranslateapis on 2023-09-09T00:29:27+01:00
+#
+# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
+#
+app = "caesarmobiletranslateapis"
+primary_region = "ams"
+[build]
+[http_service]
+  internal_port = 8080
+  force_https = true
+  auto_stop_machines = true
+  auto_start_machines = true
+  min_machines_running = 0
+  processes = ["app"]

main.py ADDED Viewed

	@@ -0,0 +1,158 @@

+#https://thepythoncode.com/article/translate-text-in-python?utm_content=cmp-true
+import asyncio
+import uvicorn
+from fastapi import FastAPI,UploadFile,Form,WebSocket,File
+from fastapi.responses import FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Any, Dict, AnyStr, List, Union
+import starlette
+from CaesarMobileTranslate.caesarmobiletranslate import CaesarMobileTranslate
+from CaesarFolderInterface.caesarfolderinterface import CaesarFolderInterface
+from CaesarMobileTranscribe.caesartranscribe import CaesarMobileTranscribe
+from CaesarMobileTTS.caesarmobiletts import CaesarMobileTTS
+from CaesarSQLDB.caesar_create_tables import CaesarCreateTables
+from CaesarSQLDB.caesarhash import CaesarHash
+from pydub import AudioSegment
+import os
+app = FastAPI()
+caesarfolders = CaesarFolderInterface()
+caesarmobtrb = CaesarMobileTranscribe()
+caesarcreatetables = CaesarCreateTables()
+caesarmobtrb.create_all_dirs()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class CaesarMobileTranslateReq(BaseModel):
+    text:str
+    dest:str
+JSONObject = Dict[AnyStr, Any]
+JSONArray = List[Any]
+JSONStructure = Union[JSONArray, JSONObject]
+@app.get("/") # POST # allow all origins all methods.
+async def home():
+    return "Hello world to Caesar Mobile Translate."
+@app.post("/caesarmobiletranslate") # POST # allow all origins all methods.
+async def caesarmobiletranslate(data : JSONStructure = None):
+    try:
+        data = dict(data)#request.get_json()
+        print(data)
+        translation,dest,original,src = caesarmobtrb.caesartrans.translate(data["text"],data["dest"])
+        return {"translation":translation,"dest":dest,"original":original,"src":src}
+    except Exception as ex:
+        return {"error":f"{type(ex)}-{ex}"}
+@app.post("/caesarmobiletranslatestoreaudio")
+async def caesarmobiletranslatestoreaudio(language:  str = Form(...),file: UploadFile = File(...)):
+    # Increase Upload filesize: https://stackoverflow.com/questions/73442335/how-to-upload-a-large-file-%E2%89%A53gb-to-fastapi-backend
+    filename = file.filename
+    fileformat = filename.split(".")[1]
+    suffix = f"_{language}"
+    argfilename = filename.replace(".mp3","").replace(".wav","") + suffix
+    contents = await file.read()
+    fields = ("filename","src","dest","translationhash","original_transcript","translated_transcript","translated_audio_contents")
+    table = "translations"
+    hash_input = argfilename.replace(suffix,'') + language
+    translationhash = CaesarHash.hash_text(hash_input)
+    condition = f"translationhash = '{translationhash}'"
+    caesarfolders.clean_all()
+    if f"{argfilename}.wav" not in os.listdir(caesarmobtrb.audio_input_folder):
+        store_res = caesarmobtrb.store_audio(argfilename,contents,fileformat)
+        if store_res:
+            return {"message":"audio stored in active directory."}
+        else:
+            return {"error":"Error storing."}
+    else:
+        return {"message":"translation already exists in db."}
+@app.websocket("/caesarmobiletranslateaudiows")
+async def caesarmobiletranslateaudio(websocket: WebSocket):
+    try:
+        await websocket.accept()
+        while True:
+            data = await websocket.receive_json()
+            filename = data["filename"]
+            language = data["language"]
+            fileformat = "wav"
+            suffix = f"_{language}"
+            argfilename = filename + suffix
+            print(argfilename)
+            fields = ("filename","src","dest","translationhash","original_transcript","translated_transcript","translated_audio_contents")
+            table = "translations"
+            hash_input = argfilename.replace(suffix,'') + language
+            translationhash = CaesarHash.hash_text(hash_input)
+            condition = f"translationhash = '{translationhash}'"
+            ttsfilename = f"{caesarmobtrb.audio_output_folder}/{argfilename}.mp3"
+            if f"{argfilename}.mp3" not in os.listdir(caesarmobtrb.audio_output_folder):
+                contents = caesarmobtrb.load_audio(argfilename,fileformat,caesarmobtrb.audio_input_folder)
+                if contents:
+                    new_sound = AudioSegment.empty()
+                    original_text = ""
+                    final_translation = ""
+                    send_interval = 3
+                    sliced_sections = caesarmobtrb .slice_sections(argfilename)
+                    for i,new_sound,dsrc,text,translation in caesarmobtrb.run_api(argfilename,language,sliced_sections,new_sound):
+                        original_text += f"{text}\n"
+                        final_translation += f"{translation}\n"
+                        new_sound.export(ttsfilename, format="mp3")
+                        current_contents = caesarmobtrb.load_audio(argfilename,"mp3",caesarmobtrb.audio_output_folder)
+                        await websocket.send_json({"progress":i,"total":len(sliced_sections),"send_audio_interval":send_interval})
+                        if i % send_interval == 0:
+                            await websocket.send_bytes(current_contents)
+                    new_sound.export(ttsfilename, format="mp3")
+                    final_contents = caesarmobtrb.load_audio(argfilename,"mp3",caesarmobtrb.audio_output_folder)
+                    await websocket.send_bytes(final_contents)
+                    # .encode('ascii')
+                    original_text = original_text.replace("\n","<new_line>",100000)
+                    original_text = original_text.encode('ascii',"ignore").decode()
+                    original_text = original_text.replace("<new_line>","\n",100000)
+                    final_translation = final_translation.replace("\n","<new_line>",100000)
+                    final_translation = final_translation.encode('ascii',"ignore").decode()
+                    final_translation = final_translation.replace("<new_line>","\n",100000)
+                    await websocket.send_json({"original_text":original_text})
+                    await websocket.send_json({"final_translation":final_translation})
+                    print({"message":"All translation audio was sent."})
+                    await websocket.send_json({"message":"All translation audio was sent."})
+                else:
+                    await websocket.send_json({"error":"error loading file in active directory send request to caesarmobiletranslatestoreaudio."})
+            else:
+                try:
+                    result = caesarmobtrb.load_audio(argfilename,"mp3",caesarmobtrb.audio_output_folder)
+                    await websocket.send_bytes(result)
+                except Exception as ex:
+                    await websocket.send_json({"error":"error getting file from active directory."})
+    except starlette.websockets.WebSocketDisconnect as wed:
+        if str(wed) == "1000":
+            caesarfolders.clean_all()
+            print("connected close handled.")
+        else:
+            print(type(wed),wed)
+async def main():
+    config = uvicorn.Config("main:app", port=7860, log_level="info",host="0.0.0.0",reload=True) # Local
+    server = uvicorn.Server(config)
+    await server.serve()
+if __name__ == "__main__":
+    asyncio.run(main())

main_db.py ADDED Viewed

	@@ -0,0 +1,176 @@

+#https://thepythoncode.com/article/translate-text-in-python?utm_content=cmp-true
+import asyncio
+import uvicorn
+from fastapi import FastAPI,UploadFile,Form,WebSocket
+from fastapi.responses import FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Any, Dict, AnyStr, List, Union
+import starlette
+from CaesarMobileTranslate.caesarmobiletranslate import CaesarMobileTranslate
+from CaesarFolderInterface.caesarfolderinterface import CaesarFolderInterface
+from CaesarMobileTranscribe.caesartranscribe import CaesarMobileTranscribe
+from CaesarSQLDB.caesarcrud import CaesarCRUD
+from CaesarMobileTTS.caesarmobiletts import CaesarMobileTTS
+from CaesarSQLDB.caesar_create_tables import CaesarCreateTables
+from CaesarSQLDB.caesarhash import CaesarHash
+from pydub import AudioSegment
+app = FastAPI()
+caesarfolders = CaesarFolderInterface()
+caesarmobtrb = CaesarMobileTranscribe()
+caesarcrud = CaesarCRUD()
+caesarcreatetables = CaesarCreateTables()
+caesarcreatetables.create(caesarcrud)
+caesarmobtrb.create_all_dirs()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class CaesarMobileTranslateReq(BaseModel):
+    text:str
+    dest:str
+JSONObject = Dict[AnyStr, Any]
+JSONArray = List[Any]
+JSONStructure = Union[JSONArray, JSONObject]
+@app.get("/") # POST # allow all origins all methods.
+async def home():
+    return "Hello world to Caesar Mobile Translate."
+@app.post("/caesarmobiletranslate") # POST # allow all origins all methods.
+async def caesarmobiletranslate(data : JSONStructure = None):
+    try:
+        data = dict(data)#request.get_json()
+        print(data)
+        translation,dest,original,src = caesarmobtrb.caesartrans.translate(data["text"],data["dest"])
+        return {"translation":translation,"dest":dest,"original":original,"src":src}
+    except Exception as ex:
+        return {"error":f"{type(ex)}-{ex}"}
+@app.post("/caesarmobiletranslatestoreaudio")
+async def caesarmobiletranslatestoreaudio(file: UploadFile,language:  str = Form()):
+    # Increase Upload filesize: https://stackoverflow.com/questions/73442335/how-to-upload-a-large-file-%E2%89%A53gb-to-fastapi-backend
+    filename = file.filename
+    fileformat = filename.split(".")[1]
+    suffix = f"_{language}"
+    argfilename = filename.replace(".mp3","").replace(".wav","") + suffix
+    contents = await file.read()
+    fields = ("filename","src","dest","translationhash","original_transcript","translated_transcript","translated_audio_contents")
+    table = "translations"
+    hash_input = argfilename.replace(suffix,'') + language
+    translationhash = CaesarHash.hash_text(hash_input)
+    condition = f"translationhash = '{translationhash}'"
+    translation_exists = caesarcrud.check_exists(("*"),table,condition)
+    caesarfolders.clean_all()
+    if not translation_exists:
+        store_res = caesarmobtrb.store_audio(argfilename,contents,fileformat)
+        if store_res:
+            return {"message":"audio stored in active directory."}
+        else:
+            return {"error":"Error storing."}
+    else:
+        return {"message":"translation already exists in db."}
+@app.websocket("/caesarmobiletranslateaudiows")
+async def caesarmobiletranslateaudio(websocket: WebSocket):
+    try:
+        await websocket.accept()
+        while True:
+            data = await websocket.receive_json()
+            filename = data["filename"]
+            language = data["language"]
+            fileformat = "wav"
+            suffix = f"_{language}"
+            argfilename = filename + suffix
+            print(argfilename)
+            fields = ("filename","src","dest","translationhash","original_transcript","translated_transcript","translated_audio_contents")
+            table = "translations"
+            hash_input = argfilename.replace(suffix,'') + language
+            translationhash = CaesarHash.hash_text(hash_input)
+            condition = f"translationhash = '{translationhash}'"
+            translation_exists = caesarcrud.check_exists(("*"),table,condition)
+            ttsfilename = f"{caesarmobtrb.audio_output_folder}/{argfilename}.mp3"
+            if not translation_exists:
+                contents = caesarmobtrb.load_audio(argfilename,fileformat,caesarmobtrb.audio_input_folder)
+                if contents:
+                    new_sound = AudioSegment.empty()
+                    original_text = ""
+                    final_translation = ""
+                    send_interval = 3
+                    sliced_sections = caesarmobtrb .slice_sections(argfilename)
+                    for i,new_sound,dsrc,text,translation in caesarmobtrb.run_api(argfilename,language,sliced_sections,new_sound):
+                        original_text += f"{text}\n"
+                        final_translation += f"{translation}\n"
+                        new_sound.export(ttsfilename, format="mp3")
+                        current_contents = caesarmobtrb.load_audio(argfilename,"mp3",caesarmobtrb.audio_output_folder)
+                        await websocket.send_json({"progress":i,"total":len(sliced_sections),"send_audio_interval":send_interval})
+                        if i % send_interval == 0:
+                            await websocket.send_bytes(current_contents)
+                    new_sound.export(ttsfilename, format="mp3")
+                    final_contents = caesarmobtrb.load_audio(argfilename,"mp3",caesarmobtrb.audio_output_folder)
+                    await websocket.send_bytes(final_contents)
+                    # .encode('ascii')
+                    original_text = original_text.replace("\n","<new_line>",100000)
+                    original_text = original_text.encode('ascii',"ignore").decode()
+                    original_text = original_text.replace("<new_line>","\n",100000)
+                    final_translation = final_translation.replace("\n","<new_line>",100000)
+                    final_translation = final_translation.encode('ascii',"ignore").decode()
+                    final_translation = final_translation.replace("<new_line>","\n",100000)
+                    await websocket.send_json({"original_text":original_text})
+                    await websocket.send_json({"final_translation":final_translation})
+                    print({"message":"All translation audio was sent."})
+                    await websocket.send_json({"message":"All translation audio was sent."})
+                    # Store db.
+                    #print("src:",src)
+                    #res = caesarcrud.post_data(fields,(f"{argfilename.replace(suffix,'')}.mp3",src,languag,translationhash,original_text,final_translation,contents),table)
+                    #if res:
+                    #    await websocket.send_json({"message":"translation was stored."})
+                    #else:
+                    #    await websocket.send_json({"error":"translation was stored."})
+                else:
+                    await websocket.send_json({"error":"error loading file in active directory send request to caesarmobiletranslatestoreaudio."})
+            else:
+                res = caesarcrud.get_data(("filename","translated_audio_contents"),table,condition)
+                if res:
+                    resjson = res[0]
+                    store_result = caesarfolders.store_audio(argfilename,resjson["translated_audio_contents"])
+                    if store_result:
+                        await websocket.send_bytes(store_result)
+                    else:
+                       await websocket.send_json({"error":"error GET storing"})
+                else:
+                    await websocket.send_json({"error":"error whilst getting data,"})
+    except starlette.websockets.WebSocketDisconnect as wed:
+        if str(wed) == "1000":
+            caesarfolders.clean_all()
+            print("connected close handled.")
+        else:
+            print(type(wed),wed)
+async def main():
+    config = uvicorn.Config("main:app", port=7860, log_level="info",host="0.0.0.0",reload=True) # Local
+    server = uvicorn.Server(config)
+    await server.serve()
+if __name__ == "__main__":
+    asyncio.run(main())

python_version ADDED Viewed

	@@ -0,0 +1 @@


1	+ python=3.9

requirements.txt ADDED Viewed

	@@ -0,0 +1,58 @@

+annotated-types==0.5.0
+anyio==3.7.1
+blis==0.7.10
+catalogue==2.0.9
+certifi==2023.7.22
+cffi==1.15.1
+chardet==3.0.4
+charset-normalizer==3.2.0
+click==8.1.7
+confection==0.1.3
+cymem==2.0.7
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0-py3-none-any.whl#sha256=83276fc78a70045627144786b52e1f2728ad5e29e5e43916ec37ea9c26a11212
+exceptiongroup==1.1.3
+fastapi==0.103.1
+googletrans==3.1.0a0
+gTTS==2.3.2
+h11==0.9.0
+h2==3.2.0
+hpack==3.0.0
+hstspreload==2023.1.1
+httpcore==0.9.1
+httpx==0.13.3
+hyperframe==5.2.0
+idna==2.10
+Jinja2==3.1.2
+langcodes==3.3.0
+MarkupSafe==2.1.3
+murmurhash==1.0.9
+mysqlclient==2.2.0
+numpy==1.24.4
+packaging==23.1
+pathy==0.10.2
+preshed==3.0.8
+pycparser==2.21
+pydantic==2.3.0
+pydantic_core==2.6.3
+pydub==0.25.1
+python-dotenv==1.0.0
+python-multipart==0.0.6
+requests==2.31.0
+rfc3986==1.5.0
+smart-open==6.4.0
+sniffio==1.3.0
+soundfile==0.12.1
+spacy==3.6.1
+spacy-legacy==3.0.12
+spacy-loggers==1.0.4
+SpeechRecognition==3.10.0
+srsly==2.4.7
+starlette==0.27.0
+thinc==8.1.12
+tqdm==4.66.1
+typer==0.9.0
+typing_extensions==4.7.1
+urllib3==2.0.4
+uvicorn==0.23.2
+wasabi==1.1.2
+websockets==11.0.3

requirements_win.txt ADDED Viewed

	@@ -0,0 +1,90 @@

+# python:3.9
+fastapi
+uvicorn
+googletrans==3.1.0a0 # This version is IMPORTANT!!!
+pydantic
+altgraph==0.17.3
+asttokens==2.0.8
+backcall==0.2.0
+blis==0.7.8
+catalogue==2.0.8
+certifi==2022.9.24
+cffi==1.15.1
+charset-normalizer==2.1.1
+click==8.1.3
+colorama==0.4.5
+confection==0.0.3
+contourpy==1.0.5
+cycler==0.11.0
+cymem==2.0.6
+Cython==0.29.32
+debugpy==1.6.3
+decorator==5.1.1
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
+entrypoints==0.4
+executing==1.1.1
+ffmpeg==1.4
+fonttools==4.37.4
+future==0.18.2
+idna  # KEEP it like this!!!
+ipykernel==6.16.0
+ipython==8.5.0
+jedi==0.18.1
+Jinja2==3.1.2
+joblib==1.2.0
+jupyter_client==7.4.2
+jupyter_core==4.11.2
+kiwisolver==1.4.4
+langcodes==3.3.0
+MarkupSafe==2.1.1
+matplotlib==3.6.1
+matplotlib-inline==0.1.6
+murmurhash==1.0.8
+nest-asyncio==1.5.6
+nltk==3.7
+numpy==1.23.4
+packaging==21.3
+parso==0.8.3
+pathy==0.6.2
+pefile==2022.5.30
+pickleshare==0.7.5
+Pillow==9.2.0
+preshed==3.0.7
+prompt-toolkit==3.0.31
+psutil==5.9.3
+pure-eval==0.2.2
+PyAudio==0.2.12
+pycparser==2.21
+pydantic==1.9.2
+pydub==0.25.1
+Pygments==2.13.0
+pyinstaller==5.5
+pyinstaller-hooks-contrib==2022.10
+pyparsing==3.0.9
+python-dateutil==2.8.2
+pywin32==304
+pywin32-ctypes==0.2.0
+pyzmq==24.0.1
+regex==2022.9.13
+requests==2.28.1
+scipy==1.9.2
+six==1.16.0
+smart-open==5.2.1
+sounddevice==0.4.5
+soundfile==0.11.0
+spacy==3.4.1
+spacy-legacy==3.0.10
+spacy-loggers==1.0.3
+SpeechRecognition==3.8.1
+srsly==2.4.4
+stack-data==0.5.1
+thinc==8.1.4
+tinyaes==1.0.3
+tornado==6.2
+tqdm==4.64.1
+traitlets==5.5.0
+typer==0.4.2
+typing_extensions==4.4.0
+urllib3==1.26.12
+wasabi==0.10.1
+wcwidth==0.2.5