radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Jul 6, 2024

Commit

3bac656

verified ·

1 Parent(s): 22f3c9f

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -50

app.py CHANGED Viewed

@@ -631,11 +631,55 @@ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
 from langchain.agents import Tool, initialize_agent
 from huggingface_hub import login
-def install_parler_tts():
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/parler-tts.git"])
-# Call the function to install parler-tts
-install_parler_tts()
 # Check if the token is already set in the environment variables
 hf_token = os.getenv("HF_TOKEN")
@@ -889,7 +933,7 @@ def bot(history, choice, tts_model):
         if tts_model == "ElevenLabs":
             audio_future = executor.submit(generate_audio_elevenlabs, response)
         else:
-            audio_future = executor.submit(generate_audio_parler_tts, response)
         for character in response:
             history[-1][1] += character
@@ -1109,50 +1153,6 @@ def generate_audio_elevenlabs(text):
         logging.error(f"Error generating audio: {response.text}")
         return None
-def generate_audio_parler_tts(text):
-    model_id = 'parler-tts/parler_tts_mini_v0.1'
-    device = "cuda:0" if torch.cuda.is_available() else "cpu"
-    try:
-        model = ParlerTTSForConditionalGeneration.from_pretrained(model_id).to(device)
-    except Exception as e:
-        print(f"Error loading Parler TTS model: {e}")
-        return None
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
-    try:
-        input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
-        prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
-    except Exception as e:
-        print(f"Error tokenizing input: {e}")
-        return None
-    max_input_length = model.config.n_positions - input_ids.shape[1]
-    segments = [prompt_input_ids[0][i:i+max_input_length] for i in range(0, prompt_input_ids.shape[1], max_input_length)]
-    audio_segments = []
-    for segment in segments:
-        segment = segment.unsqueeze(0)
-        try:
-            generation = model.generate(input_ids=input_ids, prompt_input_ids=segment)
-        except Exception as e:
-            print(f"Error generating audio segment: {e}")
-            return None
-        audio_arr = generation.cpu().numpy().squeeze()
-        audio_segments.append(audio_arr)
-    full_audio = np.concatenate(audio_segments)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-        sf.write(f.name, full_audio, model.config.sampling_rate)
-        temp_audio_path = f.name
-    logging.debug(f"Audio saved to {temp_audio_path}")
-    return temp_audio_path
 # Stable Diffusion setup
 pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
 pipe = pipe.to("cuda")
@@ -1185,7 +1185,7 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
             choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
-            tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "Parler TTS"], value="Parler TTS")
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")
@@ -1226,3 +1226,4 @@ demo.launch(share=True)

 from langchain.agents import Tool, initialize_agent
 from huggingface_hub import login
+from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
+from string import punctuation
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+repo_id = "parler-tts/parler-tts-mini-expresso"
+model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
+tokenizer = AutoTokenizer.from_pretrained(repo_id)
+feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
+SAMPLE_RATE = feature_extractor.sampling_rate
+SEED = 42
+number_normalizer = EnglishNumberNormalizer()
+def preprocess(text):
+    text = number_normalizer(text).strip()
+    if text[-1] not in punctuation:
+        text = f"{text}."
+    abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
+    def separate_abb(chunk):
+        chunk = chunk.replace(".", "")
+        print(chunk)
+        return " ".join(chunk)
+    abbreviations = re.findall(abbreviations_pattern, text)
+    for abv in abbreviations:
+        if abv in text:
+            text = text.replace(abv, separate_abb(abv))
+    return text
+def generate_audio(text, description="Thomas speaks with emphasis and excitement at a moderate pace with high quality."):
+    inputs = tokenizer(description, return_tensors="pt").to(device)
+    prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
+    set_seed(SEED)
+    generation = model.generate(input_ids=inputs.input_ids, prompt_input_ids=prompt.input_ids)
+    audio_arr = generation.cpu().numpy().squeeze()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+        sf.write(f.name, audio_arr, SAMPLE_RATE)
+        temp_audio_path = f.name
+    logging.debug(f"Audio saved to {temp_audio_path}")
+    return temp_audio_path
 # Check if the token is already set in the environment variables
 hf_token = os.getenv("HF_TOKEN")
         if tts_model == "ElevenLabs":
             audio_future = executor.submit(generate_audio_elevenlabs, response)
         else:
+            audio_future = executor.submit(generate_audio, response)  # Updated function call
         for character in response:
             history[-1][1] += character
         logging.error(f"Error generating audio: {response.text}")
         return None
 # Stable Diffusion setup
 pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
 pipe = pipe.to("cuda")
             chatbot = gr.Chatbot([], elem_id="RADAR:Channel 94.1", bubble_full_width=False)
             choice = gr.Radio(label="Select Style", choices=["Details", "Conversational"], value="Conversational")
+            tts_choice = gr.Radio(label="Select TTS Model", choices=["ElevenLabs", "New TTS Model"], value="New TTS Model")
             gr.Markdown("<h1 style='color: red;'>Talk to RADAR</h1>", elem_id="voice-markdown")
             chat_input = gr.Textbox(show_copy_button=True, interactive=True, show_label=False, label="ASK Radar !!!")