Spaces:

sasan
/

KITT

Build error

File size: 8,623 Bytes

5baa807

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Text to Speech Playground"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "import torch\n",
    "import gradio as gr\n",
    "from TTS.api import TTS\n",
    "os.environ[\"COQUI_TOS_AGREED\"] = \"1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import namedtuple\n",
    "\n",
    "Voice = namedtuple('voice', ['name', 'neutral','sad','angry','happy'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "voices = [\n",
    "    Voice('Rick', neutral='audio/rick/angry.mp3', sad=None, angry=None, happy=None),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[voice(name='Rick', neutral='audio/rick/angry.mp3', sad=None, angry=None, happy=None)]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "voices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " > tts_models/multilingual/multi-dataset/xtts_v1.1 is already downloaded.\n",
      " > Using model: xtts\n"
     ]
    }
   ],
   "source": [
    "#load model for text to speech\n",
    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "tts_pipelins = TTS(\"tts_models/multilingual/multi-dataset/xtts_v1.1\").to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " > Text splitted to sentences.\n",
      "[\"Hello, I am Rick, pickle rick, you took a wrong turn and now you're stuck in a parallel universe\"]\n",
      " > Processing time: 0.7903299331665039\n",
      " > Real-time factor: 0.11176741294459602\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'out.wav'"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tts_pipelins.tts_to_file(\"Hello, I am Rick, pickle rick, you took a wrong turn and now you're stuck in a parallel universe\", speaker_wav=\"audio/rick/neutral.wav\", emotion='neutral', language='en', file_path='out.wav')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def text_to_speech(voice, tts):\n",
    "    return voice.neutral"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "    tts.tts_to_file(text= str(quest_processing[0]),\n",
    "                file_path=\"output.wav\",\n",
    "                speaker_wav=f'Audio_Files/{voice}.wav',\n",
    "                language=quest_processing[3],\n",
    "                emotion = \"angry\")\n",
    "\n",
    "    audio_path = \"output.wav\"\n",
    "    return audio_path, state['context'], state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.10/site-packages/gradio/utils.py:924: UserWarning: Expected 1 arguments for function <function tts at 0x7fcfd6feb760>, received 3.\n",
      "  warnings.warn(\n",
      "/opt/conda/lib/python3.10/site-packages/gradio/utils.py:932: UserWarning: Expected maximum 1 arguments for function <function tts at 0x7fcfd6feb760>, received 3.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Running on local URL:  http://0.0.0.0:7860\n",
      "\n",
      "Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024/04/18 13:48:05 [W] [service.go:132] login to server failed: dial tcp 44.237.78.176:7000: i/o timeout\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://localhost:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Traceback (most recent call last):\n",
      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/queueing.py\", line 527, in process_events\n",
      "    response = await route_utils.call_process_api(\n",
      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/route_utils.py\", line 261, in call_process_api\n",
      "    output = await app.get_blocks().process_api(\n",
      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/blocks.py\", line 1786, in process_api\n",
      "    result = await self.call_function(\n",
      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/blocks.py\", line 1338, in call_function\n",
      "    prediction = await anyio.to_thread.run_sync(\n",
      "  File \"/opt/conda/lib/python3.10/site-packages/anyio/to_thread.py\", line 56, in run_sync\n",
      "    return await get_async_backend().run_sync_in_worker_thread(\n",
      "  File \"/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 2144, in run_sync_in_worker_thread\n",
      "    return await future\n",
      "  File \"/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py\", line 851, in run\n",
      "    result = context.run(func, *args)\n",
      "  File \"/opt/conda/lib/python3.10/site-packages/gradio/utils.py\", line 759, in wrapper\n",
      "    response = f(*args, **kwargs)\n",
      "TypeError: tts() takes 1 positional argument but 3 were given\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Keyboard interruption in main thread... closing server.\n",
      "Killing tunnel 0.0.0.0:7860 <> None\n"
     ]
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#INTERFACE WITH AUDIO TO AUDIO\n",
    "\n",
    "#to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/ \n",
    "#in \"Insecure origins treated as secure\", enable it and relaunch chrome\n",
    "\n",
    "\n",
    "model_answer= ''\n",
    "general_context= ''\n",
    "# Define the initial state with some initial context.\n",
    "print(general_context)\n",
    "initial_state = {'context': general_context}\n",
    "initial_context= initial_state['context']\n",
    "# Create the Gradio interface.\n",
    "iface = gr.Interface(\n",
    "    fn=tts,\n",
    "    inputs=[\n",
    "        gr.Textbox(value=initial_context, visible=True),\n",
    "        gr.Radio(choices=[x.name for x in voices], label='Choose a voice', value=voices[0].name, show_label=True),  # Radio button for voice selection\n",
    "        gr.State()  # This will keep track of the context state across interactions.\n",
    "    ],\n",
    "    outputs=[\n",
    "        gr.Audio(label = 'output audio'),\n",
    "        gr.State()\n",
    "    ]\n",
    ")\n",
    "#close all interfaces open to make the port available\n",
    "gr.close_all()\n",
    "# Launch the interface.\n",
    "iface.launch(debug=True, share=True, server_name=\"0.0.0.0\", server_port=7860, ssl_verify=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}