renator's picture
create table for text to speech and update the schemas for user creation as well
4b98fcb
raw
history blame
4.71 kB
import os
import uuid
import time
import logging # Import the logging module
import torch
from django.http import FileResponse
from rest_framework import status
from rest_framework.response import Response
from rest_framework.generics import CreateAPIView
from TTS.api import TTS
from rest_framework.authentication import TokenAuthentication
from rest_framework.permissions import IsAuthenticated
from texttovoice.models import TextToSpeech
from .serializers import TextToSpeechSerializer
from rest_framework.parsers import MultiPartParser
from drf_yasg import openapi
from drf_yasg.utils import swagger_auto_schema
# Initialize logger at module level
logger = logging.getLogger(__name__)
class TextToSpeechCreateView(CreateAPIView):
serializer_class = TextToSpeechSerializer
authentication_classes = [TokenAuthentication] # Apply token authentication
permission_classes = [IsAuthenticated] # Require authentication for this view
parser_classes = [MultiPartParser]
@swagger_auto_schema(
operation_id='Create a document',
operation_description='Create a document by providing file and s3_key',
manual_parameters=[
openapi.Parameter('file', openapi.IN_FORM, type=openapi.TYPE_FILE, description='Document to be uploaded'),
openapi.Parameter('s3_key', openapi.IN_FORM, type=openapi.TYPE_STRING, description='S3 Key of the Document '
'(folders along with name)')
],
responses={
status.HTTP_200_OK: openapi.Response(
'Success', schema=openapi.Schema(type=openapi.TYPE_OBJECT, properties={
'doc_id': openapi.Schema(type=openapi.TYPE_STRING, description='Document ID'),
'mime_type': openapi.Schema(type=openapi.TYPE_STRING, description='Mime Type of the Document'),
'version_id': openapi.Schema(type=openapi.TYPE_STRING, description='S3 version ID of the document')
})
)
}
)
def create(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
if serializer.is_valid():
gpu_available = torch.cuda.is_available()
text = serializer.validated_data.get("text")
speaker_wav = serializer.validated_data.get("speaker_wav")
language = serializer.validated_data.get("language")
output_filename = f"output_{uuid.uuid4()}.wav"
# Log the start time
start_time = time.time()
# Save the uploaded speaker file to a temporary location
speaker_file_path = os.path.join('/tmp', speaker_wav.name)
with open(speaker_file_path, "wb") as destination:
for chunk in speaker_wav.chunks():
destination.write(chunk)
# Generate speech using tts.tts_to_file
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu_available)
tts.tts_to_file(text=text, file_path=output_filename, speaker_wav=speaker_file_path, language=language)
# Log the end time
end_time = time.time()
# Calculate the processing time
processing_time = end_time - start_time
# Define a function to delete the output file
def file_iterator(file_name):
with open(file_name, 'rb') as f:
yield from f
# Delete the file after sending it
try:
os.remove(file_name)
except Exception as e:
# You might want to log this error
pass
# Use the file_iterator to create a FileResponse
TextToSpeech.objects.create(
text=text,
speaker_wav=speaker_wav,
output_wav=output_filename,
language=language,
created_by=request.user # Assign the authenticated user here
)
response = FileResponse(file_iterator(output_filename), as_attachment=True, content_type='audio/wav')
# Log the processing time using the logger
logger.info(f"start time: {start_time} , end time: {end_time} and Processing time: {processing_time} seconds")
return response
# except Exception as e:
# return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)