Spaces:

peteralexandercharles
/

testsite

Configuration error

App Files Files Community

peteralexandercharles commited on Jan 9, 2023

Commit

98d3f44

1 Parent(s): 8514371

Upload 11 files

Browse files

Files changed (11) hide show

COPYING +22 -0
Dockerfile +32 -0
README.md +33 -11
align.py +60 -0
install.sh +11 -0
install_deps.sh +18 -0
install_language_model.sh +3 -0
install_models.sh +17 -0
pylintrc +2 -0
serve.py +274 -0
setup.py +20 -0

COPYING ADDED Viewed

	@@ -0,0 +1,22 @@

+The MIT License (MIT)
+Copyright (c) 2015 Robert M Ochshorn
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

Dockerfile ADDED Viewed

	@@ -0,0 +1,32 @@

+FROM ubuntu:18.04
+RUN DEBIAN_FRONTEND=noninteractive && \
+	apt-get update && \
+	apt-get install -y \
+		gcc g++ gfortran \
+		libc++-dev \
+		libstdc++-6-dev zlib1g-dev \
+		automake autoconf libtool \
+		git subversion \
+		libatlas3-base \
+		nvidia-cuda-dev \
+		ffmpeg \
+		python3 python3-dev python3-pip \
+		python python-dev python-pip \
+		wget unzip && \
+	apt-get clean
+ADD ext /gentle/ext
+RUN export MAKEFLAGS=' -j8' &&  cd /gentle/ext && \
+	./install_kaldi.sh && \
+	make depend && make && rm -rf kaldi *.o
+ADD . /gentle
+RUN cd /gentle && python3 setup.py develop
+RUN cd /gentle && ./install_models.sh
+EXPOSE 8765
+VOLUME /gentle/webdata
+CMD cd /gentle && python3 serve.py

README.md CHANGED Viewed

@@ -1,11 +1,33 @@
----
-title: Testsite
-emoji: 🌖
-colorFrom: indigo
-colorTo: blue
-sdk: docker
-pinned: false
-license: cc-by-nc-sa-3.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Gentle
+**Robust yet lenient forced-aligner built on Kaldi. A tool for aligning speech with text.**
+## Getting Started
+There are three ways to install Gentle.
+1. Download the [pre-built Mac application](https://github.com/lowerquality/gentle/releases/latest). This package includes a GUI that will start the server and a browser. It only works on Mac OS.
+2. Use the [Docker](https://www.docker.com/) image. Just run ```docker run -P lowerquality/gentle```. This works on all platforms supported by Docker.
+3. Download the source code and run ```./install.sh```. Then run ```python3 serve.py``` to start the server. This works on Mac and Linux.
+## Using Gentle
+By default, the aligner listens at http://localhost:8765. That page has a graphical interface for transcribing audio, viewing results, and downloading data.
+There is also a REST API so you can use Gentle in your programs. Here's an example of how to use the API with CURL:
+```bash
+curl -F "[email protected]" -F "[email protected]" "http://localhost:8765/transcriptions?async=false"
+```
+If you've downloaded the source code you can also run the aligner as a command line program:
+```bash
+git clone https://github.com/lowerquality/gentle.git
+cd gentle
+./install.sh
+python3 align.py audio.mp3 words.txt
+```
+The default behaviour outputs the JSON to stdout.  See `python3 align.py --help` for options.

align.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import argparse
+import logging
+import multiprocessing
+import os
+import sys
+import gentle
+parser = argparse.ArgumentParser(
+        description='Align a transcript to audio by generating a new language model.  Outputs JSON')
+parser.add_argument(
+        '--nthreads', default=multiprocessing.cpu_count(), type=int,
+        help='number of alignment threads')
+parser.add_argument(
+        '-o', '--output', metavar='output', type=str,
+        help='output filename')
+parser.add_argument(
+        '--conservative', dest='conservative', action='store_true',
+        help='conservative alignment')
+parser.set_defaults(conservative=False)
+parser.add_argument(
+        '--disfluency', dest='disfluency', action='store_true',
+        help='include disfluencies (uh, um) in alignment')
+parser.set_defaults(disfluency=False)
+parser.add_argument(
+        '--log', default="INFO",
+        help='the log level (DEBUG, INFO, WARNING, ERROR, or CRITICAL)')
+parser.add_argument(
+        'audiofile', type=str,
+        help='audio file')
+parser.add_argument(
+        'txtfile', type=str,
+        help='transcript text file')
+args = parser.parse_args()
+log_level = args.log.upper()
+logging.getLogger().setLevel(log_level)
+disfluencies = set(['uh', 'um'])
+def on_progress(p):
+    for k,v in p.items():
+        logging.debug("%s: %s" % (k, v))
+with open(args.txtfile, encoding="utf-8") as fh:
+    transcript = fh.read()
+resources = gentle.Resources()
+logging.info("converting audio to 8K sampled wav")
+with gentle.resampled(args.audiofile) as wavfile:
+    logging.info("starting alignment")
+    aligner = gentle.ForcedAligner(resources, transcript, nthreads=args.nthreads, disfluency=args.disfluency, conservative=args.conservative, disfluencies=disfluencies)
+    result = aligner.transcribe(wavfile, progress_cb=on_progress, logging=logging)
+fh = open(args.output, 'w', encoding="utf-8") if args.output else sys.stdout
+fh.write(result.to_json(indent=2))
+if args.output:
+    logging.info("output written to %s" % (args.output))

install.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+#!/bin/bash
+set -e
+git submodule init
+git submodule update
+./install_deps.sh
+(cd ext && ./install_kaldi.sh)
+./install_models.sh
+cd ext && make depend && make

install_deps.sh ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/bin/bash
+set -e
+echo "Installing dependencies..."
+# Install OS-specific dependencies
+if [[ "$OSTYPE" == "linux-gnu" ]]; then
+	apt-get update -qq
+	apt-get install -y zlib1g-dev automake autoconf git \
+		libtool subversion libatlas3-base python3-pip \
+		python3-dev wget unzip python3
+	apt-get install -y ffmpeg || echo -n  "\n\nYou have to install ffmpeg from a PPA or from https://ffmpeg.org before you can run gentle\n\n"
+	python3 setup.py develop
+elif [[ "$OSTYPE" == "darwin"* ]]; then
+	brew install ffmpeg libtool automake autoconf wget python3
+	sudo python3 setup.py develop
+fi

install_language_model.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+wget -c https://lowerquality.com/gentle/aspire-hclg.tar.gz
+tar -xzvf aspire-hclg.tar.gz
+rm aspire-hclg.tar.gz

install_models.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/bin/bash
+set -e
+VERSION="0.03"
+download_models() {
+	local version="$1"
+	local filename="kaldi-models-$version.zip"
+	local url="https://lowerquality.com/gentle/$filename"
+	wget -O $filename $url
+	unzip $filename
+	rm $filename
+}
+echo "Downloading models for v$VERSION..." 1>&2
+download_models $VERSION

pylintrc ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [MESSAGES CONTROL]
2	+ disable=locally-disabled

serve.py ADDED Viewed

	@@ -0,0 +1,274 @@

+from twisted.web.static import File
+from twisted.web.resource import Resource
+from twisted.web.server import Site, NOT_DONE_YET
+from twisted.internet import reactor, threads
+from twisted.web._responses import FOUND
+import json
+import logging
+import multiprocessing
+import os
+import shutil
+import uuid
+import wave
+from gentle.util.paths import get_resource, get_datadir
+from gentle.util.cyst import Insist
+import gentle
+class TranscriptionStatus(Resource):
+    def __init__(self, status_dict):
+        self.status_dict = status_dict
+        Resource.__init__(self)
+    def render_GET(self, req):
+        req.setHeader(b"Content-Type", "application/json")
+        return json.dumps(self.status_dict).encode()
+class Transcriber():
+    def __init__(self, data_dir, nthreads=4, ntranscriptionthreads=2):
+        self.data_dir = data_dir
+        self.nthreads = nthreads
+        self.ntranscriptionthreads = ntranscriptionthreads
+        self.resources = gentle.Resources()
+        self.full_transcriber = gentle.FullTranscriber(self.resources, nthreads=ntranscriptionthreads)
+        self._status_dicts = {}
+    def get_status(self, uid):
+        return self._status_dicts.setdefault(uid, {})
+    def out_dir(self, uid):
+        return os.path.join(self.data_dir, 'transcriptions', uid)
+    # TODO(maxhawkins): refactor so this is returned by transcribe()
+    def next_id(self):
+        uid = None
+        while uid is None or os.path.exists(os.path.join(self.data_dir, uid)):
+            uid = uuid.uuid4().hex[:8]
+        return uid
+    def transcribe(self, uid, transcript, audio, async_mode, **kwargs):
+        status = self.get_status(uid)
+        status['status'] = 'STARTED'
+        output = {
+            'transcript': transcript
+        }
+        outdir = os.path.join(self.data_dir, 'transcriptions', uid)
+        tran_path = os.path.join(outdir, 'transcript.txt')
+        with open(tran_path, 'w') as tranfile:
+            tranfile.write(transcript)
+        audio_path = os.path.join(outdir, 'upload')
+        with open(audio_path, 'wb') as wavfile:
+            wavfile.write(audio)
+        status['status'] = 'ENCODING'
+        wavfile = os.path.join(outdir, 'a.wav')
+        if gentle.resample(os.path.join(outdir, 'upload'), wavfile) != 0:
+            status['status'] = 'ERROR'
+            status['error'] = "Encoding failed. Make sure that you've uploaded a valid media file."
+            # Save the status so that errors are recovered on restart of the server
+            # XXX: This won't work, because the endpoint will override this file
+            with open(os.path.join(outdir, 'status.json'), 'w') as jsfile:
+                json.dump(status, jsfile, indent=2)
+            return
+        #XXX: Maybe we should pass this wave object instead of the
+        # file path to align_progress
+        wav_obj = wave.open(wavfile, 'rb')
+        status['duration'] = wav_obj.getnframes() / float(wav_obj.getframerate())
+        status['status'] = 'TRANSCRIBING'
+        def on_progress(p):
+            print(p)
+            for k,v in p.items():
+                status[k] = v
+        if len(transcript.strip()) > 0:
+            trans = gentle.ForcedAligner(self.resources, transcript, nthreads=self.nthreads, **kwargs)
+        elif self.full_transcriber.available:
+            trans = self.full_transcriber
+        else:
+            status['status'] = 'ERROR'
+            status['error']  = 'No transcript provided and no language model for full transcription'
+            return
+        output = trans.transcribe(wavfile, progress_cb=on_progress, logging=logging)
+        # ...remove the original upload
+        os.unlink(os.path.join(outdir, 'upload'))
+        # Save
+        with open(os.path.join(outdir, 'align.json'), 'w') as jsfile:
+            jsfile.write(output.to_json(indent=2))
+        with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile:
+            csvfile.write(output.to_csv())
+        # Inline the alignment into the index.html file.
+        htmltxt = open(get_resource('www/view_alignment.html')).read()
+        htmltxt = htmltxt.replace("var INLINE_JSON;", "var INLINE_JSON=%s;" % (output.to_json()));
+        open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt)
+        status['status'] = 'OK'
+        logging.info('done with transcription.')
+        return output
+class TranscriptionsController(Resource):
+    def __init__(self, transcriber):
+        Resource.__init__(self)
+        self.transcriber = transcriber
+    def getChild(self, uid, req):
+        uid = uid.decode()
+        out_dir = self.transcriber.out_dir(uid)
+        trans_ctrl = File(out_dir)
+        # Add a Status endpoint to the file
+        trans_status = TranscriptionStatus(self.transcriber.get_status(uid))
+        trans_ctrl.putChild(b"status.json", trans_status)
+        return trans_ctrl
+    def render_POST(self, req):
+        uid = self.transcriber.next_id()
+        tran = req.args.get(b'transcript', [b''])[0].decode()
+        audio = req.args[b'audio'][0]
+        disfluency = True if b'disfluency' in req.args else False
+        conservative = True if b'conservative' in req.args else False
+        kwargs = {'disfluency': disfluency,
+                  'conservative': conservative,
+                  'disfluencies': set(['uh', 'um'])}
+        async_mode = True
+        if b'async' in req.args and req.args[b'async'][0] == b'false':
+            async_mode = False
+        # We need to make the transcription directory here, so that
+        # when we redirect the user we are sure that there's a place
+        # for them to go.
+        outdir = os.path.join(self.transcriber.data_dir, 'transcriptions', uid)
+        os.makedirs(outdir)
+        # Copy over the HTML
+        shutil.copy(get_resource('www/view_alignment.html'), os.path.join(outdir, 'index.html'))
+        result_promise = threads.deferToThreadPool(
+            reactor, reactor.getThreadPool(),
+            self.transcriber.transcribe,
+            uid, tran, audio, async_mode, **kwargs)
+        if not async_mode:
+            def write_result(result):
+                '''Write JSON to client on completion'''
+                req.setHeader("Content-Type", "application/json")
+                req.write(result.to_json(indent=2).encode())
+                req.finish()
+            result_promise.addCallback(write_result)
+            result_promise.addErrback(lambda _: None) # ignore errors
+            req.notifyFinish().addErrback(lambda _: result_promise.cancel())
+            return NOT_DONE_YET
+        req.setResponseCode(FOUND)
+        req.setHeader(b"Location", "/transcriptions/%s" % (uid))
+        return b''
+class LazyZipper(Insist):
+    def __init__(self, cachedir, transcriber, uid):
+        self.transcriber = transcriber
+        self.uid = uid
+        Insist.__init__(self, os.path.join(cachedir, '%s.zip' % (uid)))
+    def serialize_computation(self, outpath):
+        shutil.make_archive('.'.join(outpath.split('.')[:-1]), # We need to strip the ".zip" from the end
+                            "zip",                             # ...because `shutil.make_archive` adds it back
+                            os.path.join(self.transcriber.out_dir(self.uid)))
+class TranscriptionZipper(Resource):
+    def __init__(self, cachedir, transcriber):
+        self.cachedir = cachedir
+        self.transcriber = transcriber
+        Resource.__init__(self)
+    def getChild(self, path, req):
+        uid = path.decode().split('.')[0]
+        t_dir = self.transcriber.out_dir(uid)
+        if os.path.exists(t_dir):
+            # TODO: Check that "status" is complete and only create a LazyZipper if so
+            # Otherwise, we could have incomplete transcriptions that get permanently zipped.
+            # For now, a solution will be hiding the button in the client until it's done.
+            lz = LazyZipper(self.cachedir, self.transcriber, uid)
+            if not isinstance(path, bytes):
+                path = path.encode()
+            self.putChild(path, lz)
+            return lz
+        else:
+            return Resource.getChild(self, path, req)
+def serve(port=8765, interface='0.0.0.0', installSignalHandlers=0, nthreads=4, ntranscriptionthreads=2, data_dir=get_datadir('webdata')):
+    logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers)
+    if not os.path.exists(data_dir):
+        os.makedirs(data_dir)
+    zip_dir = os.path.join(data_dir, 'zip')
+    if not os.path.exists(zip_dir):
+        os.makedirs(zip_dir)
+    f = File(data_dir)
+    f.putChild(b'', File(get_resource('www/index.html')))
+    f.putChild(b'status.html', File(get_resource('www/status.html')))
+    f.putChild(b'preloader.gif', File(get_resource('www/preloader.gif')))
+    trans = Transcriber(data_dir, nthreads=nthreads, ntranscriptionthreads=ntranscriptionthreads)
+    trans_ctrl = TranscriptionsController(trans)
+    f.putChild(b'transcriptions', trans_ctrl)
+    trans_zippr = TranscriptionZipper(zip_dir, trans)
+    f.putChild(b'zip', trans_zippr)
+    s = Site(f)
+    logging.info("about to listen")
+    reactor.listenTCP(port, s, interface=interface)
+    logging.info("listening")
+    reactor.run(installSignalHandlers=installSignalHandlers)
+if __name__=='__main__':
+    import argparse
+    parser = argparse.ArgumentParser(
+        description='Align a transcript to audio by generating a new language model.')
+    parser.add_argument('--host', default="0.0.0.0",
+                       help='host to run http server on')
+    parser.add_argument('--port', default=8765, type=int,
+                        help='port number to run http server on')
+    parser.add_argument('--nthreads', default=multiprocessing.cpu_count(), type=int,
+                        help='number of alignment threads')
+    parser.add_argument('--ntranscriptionthreads', default=2, type=int,
+                        help='number of full-transcription threads (memory intensive)')
+    parser.add_argument('--log', default="INFO",
+                        help='the log level (DEBUG, INFO, WARNING, ERROR, or CRITICAL)')
+    args = parser.parse_args()
+    log_level = args.log.upper()
+    logging.getLogger().setLevel(log_level)
+    logging.info('gentle %s' % (gentle.__version__))
+    logging.info('listening at %s:%d\n' % (args.host, args.port))
+    serve(args.port, args.host, nthreads=args.nthreads, ntranscriptionthreads=args.ntranscriptionthreads, installSignalHandlers=1)

setup.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from setuptools import setup
+from gentle import __version__
+setup(
+    app=['serve.py'],
+    data_files=[],
+    options={'py2app': {
+        'argv_emulation': False,
+        'resources': 'k3,m3,ffmpeg,www,exp'
+    }},
+    name='gentle',
+    version=__version__,
+    description='Robust yet lenient forced-aligner built on Kaldi.',
+    url='http://lowerquality.com/gentle',
+    author='Robert M Ochshorn',
+    license='MIT',
+    packages=['gentle'],
+    install_requires=['twisted'],
+    test_suite='tests',
+)