Spaces:

undetectable
/

voice-clone

Configuration error

App Files Files Community

voice-clone / texttovoice /views.py

renator

updates

e2882d6 over 1 year ago

raw

history blame contribute delete

6.95 kB

	from datetime import timedelta
	import os
	import uuid
	import torch
	from rest_framework import status
	from rest_framework.response import Response
	from rest_framework.generics import CreateAPIView,ListAPIView
	from TTS.api import TTS # Ensure this import is correct based on your TTS library/package
	from rest_framework.authentication import TokenAuthentication
	from rest_framework.permissions import IsAuthenticated
	from texttovoice.models import TextToSpeech
	from .serializers import TextToSpeechSerializer, TextToSpeechSerializerResponse ,TextToSpeechSerializerResponseWithURL # Ensure this import matches your file structure
	from rest_framework.parsers import MultiPartParser
	from drf_yasg.utils import swagger_auto_schema
	from drf_yasg import openapi
	from rest_framework.exceptions import NotFound as NOT_FOUND
	from .minio_utils import get_minio_client # Ensure this import matches your file structure

	minio_client = get_minio_client()

	BUCKET_NAME = "voice-clone"

	class TextToSpeechCreateView(CreateAPIView):
	serializer_class = TextToSpeechSerializer
	authentication_classes = [TokenAuthentication]
	permission_classes = [IsAuthenticated]
	parser_classes = [MultiPartParser]

	@swagger_auto_schema(
	operation_id='Create a document',
	operation_description='Create a document by providing file and s3_key',
	manual_parameters=[
	openapi.Parameter('file', openapi.IN_FORM, type=openapi.TYPE_FILE, description='Document to be uploaded'),
	openapi.Parameter('s3_key', openapi.IN_FORM, type=openapi.TYPE_STRING, description='S3 Key of the Document (folders along with name)')
	],
	responses={
	status.HTTP_200_OK: openapi.Response(
	'Success', schema=openapi.Schema(type=openapi.TYPE_OBJECT, properties={
	'doc_id': openapi.Schema(type=openapi.TYPE_STRING, description='Document ID'),
	'mime_type': openapi.Schema(type=openapi.TYPE_STRING, description='Mime Type of the Document'),
	'version_id': openapi.Schema(type=openapi.TYPE_STRING, description='S3 version ID of the document')
	})
	)
	}
	)
	def create(self, request, args, *kwargs):
	serializer = self.get_serializer(data=request.data)
	if serializer.is_valid():
	try:
	gpu_available = torch.cuda.is_available()
	text = serializer.validated_data.get("text")
	speaker_wav = serializer.validated_data.get("speaker_wav")
	language = serializer.validated_data.get("language")

	# Temporary file paths
	speaker_file_path = os.path.join('/tmp', f"{uuid.uuid4()}{speaker_wav.name}")
	output_filename = os.path.join('/tmp', f"{uuid.uuid4()}.wav")

	# Save speaker WAV file
	with open(speaker_file_path, "wb") as destination:
	for chunk in speaker_wav.chunks():
	destination.write(chunk)

	# TTS processing
	tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu_available)
	tts.tts_to_file(text=text, file_path=output_filename, speaker_wav=speaker_file_path, language=language)

	# Upload files to MinIO and cleanup
	public_url,speaker_wav_path = self.upload_file_to_minio(speaker_file_path, 'speakers/')
	public_url_output ,output_wav_path = self.upload_file_to_minio(output_filename, 'output/')

	# Create DB entry
	tts_instance = TextToSpeech.objects.create(
	text=text,
	speaker_wav=speaker_wav_path,
	output_wav=output_wav_path,
	language=language,
	created_by=request.user
	)

	# Serialize and return the created instance
	response_serializer = TextToSpeechSerializerResponse(tts_instance)
	response_data = {
	**response_serializer.data,
	"speaker_wav": public_url,
	"output_wav": public_url_output
	}

	return Response(response_data, status=status.HTTP_201_CREATED)
	except Exception as e:
	print("Error due to ",e)
	return Response({"error": "An error occurred processing your request."}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
	finally:
	# Ensure cleanup happens
	self.cleanup_files([speaker_file_path, output_filename])
	else:
	return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)

	def upload_file_to_minio(self, file_path, prefix):
	"""Uploads a file to MinIO and returns a pre-signed URL for secure, temporary access."""
	file_name = os.path.basename(file_path)
	object_name = f"{prefix}{file_name}"
	with open(file_path, "rb") as file_data:
	minio_client.put_object(BUCKET_NAME, object_name, file_data, os.path.getsize(file_path))

	# Generate a pre-signed URL for the uploaded object
	pre_signed_url = minio_client.presigned_get_object(BUCKET_NAME, object_name, expires=timedelta(days=1))
	return pre_signed_url ,f"{BUCKET_NAME}/{object_name}"


	def cleanup_files(self, file_paths):
	"""Removes files from the filesystem."""
	for file_path in file_paths:
	try:
	os.remove(file_path)
	except Exception as e:
	print(e)
	pass

	class TextToSpeechListView(ListAPIView):
	serializer_class = TextToSpeechSerializerResponseWithURL
	authentication_classes = [TokenAuthentication]
	permission_classes = [IsAuthenticated]

	def get_queryset(self):
	return TextToSpeech.objects.filter(created_by=self.request.user)

	def list(self, request, args, *kwargs):
	queryset = self.get_queryset()

	if not queryset.exists():
	raise NOT_FOUND('No text-to-speech data found for the current user.')

	# Directly serialize the data, pre-signed URLs are handled by the serializer
	serializer = self.get_serializer(queryset, many=True, context={'view': self})
	return Response(serializer.data, status=status.HTTP_200_OK)

	def generate_presigned_url(self, object_path):
	# Ensure this logic correctly splits your `object_path` to get the bucket name and object name
	# This example assumes `object_path` is in the format "bucket_name/object_name"
	try:
	bucket, object_name = object_path.split('/', 1)
	presigned_url = minio_client.presigned_get_object(bucket, object_name, expires=timedelta(hours=1))
	return presigned_url
	except Exception as e:
	print(e)
	return None