Spaces:

wasmdashai
/

wasm-spad

Sleeping

File size: 4,888 Bytes

import gradio as gr
import os
from transformers import AutoTokenizer,VitsModel

import google.generativeai as genai
import torch

api_key =os.environ.get("id_gmkey")
token=os.environ.get("key_")
genai.configure(api_key=api_key)
tokenizer = AutoTokenizer.from_pretrained("asg2024/vits-ar-sa-huba",token=token)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=VitsModel.from_pretrained("asg2024/vits-ar-sa-huba",token=api_key).to(device)


generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
  # safety_settings = Adjust safety settings
  # See https://ai.google.dev/gemini-api/docs/safety-settings
)

def create_chat_session():
    chat_session = model.start_chat(
                  history=[
                    {
                      "role": "user",
                      "parts": [
                        "السلام عليكم اريد منك ان ترد على اسئلتي  دائما باللهجة السعودية النجدية  \n\n",
                      ],
                    },
                    {
                      "role": "model",
                      "parts": [
                        "هلا والله، إسأل ما في خاطرك وأنا حاضر أساعدك، بس بشرط واحد، أسئلتك تكون واضحة عشان أفهم عليك عدل وأعطيك الجواب الزين. قل وش تبي وأنا حاضر! \n",
                      ],
                    },
                    {
                      "role": "user",
                      "parts": [
                        "كيف حالك اخبارك\n",
                      ],
                    },
                    {
                      "role": "model",
                      "parts": [
                        "هلا والله وغلا، أنا طيب وبخير الحمد لله،  انت كيفك؟ عساك طيب؟ \n \n وش عندك أخبار؟ عسى كلها زينة.  \n",
                      ],
                    },
                    {
                      "role": "user",
                      "parts": [
                        "اريد ايضا ان تكون اجابتك مختصره على سبيل المثال ااكثر اجابة سطرين\n",
                      ],
                    },
                    {
                      "role": "model",
                      "parts": [
                        "خلاص، فهمتك. من عيوني، أسئلتك من اليوم وطالع أجوبتها ما تتعدى سطرين.  \n \n إسأل وشف! \n",
                      ],
                    },
                  ]
                )
    return chat_session

AI=create_chat_session()



def   get_answer_ai(text):
      global AI
      try:
          response = AI.send_message(text,stream=True)
          return response

          
      except :
          AI=create_chat_session()
          response = AI.send_message(text,stream=True)
          return response

def   modelspeech(text):
     with torch.no_grad():
          inputs = tokenizer(text, return_tensors="pt")#.cuda()

          wav = model(input_ids=inputs["input_ids"].to(device)).waveform.cpu().numpy().reshape(-1)
          # display(Audio(wav, rate=model.config.sampling_rate))
          return  model.config.sampling_rate,wav#remove_noise_nr(wav)

import re
def clean_text(text):
  # Remove symbols and extra spaces
  cleaned_text = re.sub(r'[^\w\s]', '', text)  # Remove symbols
  cleaned_text = re.sub(r'\s+', '  ', cleaned_text)  # Normalize spaces
  return cleaned_text.strip()  # Remove leading/trailing spaces


def text_to_speech(text):
    
    job = dash(text)
    pad_text=''
    k=0
    for chunk in job:
       
       pad_text+=str(clean_text(chunk))
       
       if pad_text!='' and len(pad_text)>10:
           out=pad_text
           pad_text=''
           k+=1
        
           yield modelspeech(out)
    if k==0:
         out=pad_text
         yield modelspeech(pad_text)
def dash(text):
    
    response=get_answer_ai(text)
    for chunk in  response:
        yield chunk.text
    # return textai


# demo = gr.Interface(fn=dash, inputs=["text"], outputs=['text'])
# demo.launch()

with gr.Blocks() as demo:
    with gr.Tab("AI Text  "):
        gr.Markdown("# Text to Speech")
        text_input = gr.Textbox(label="Enter Text")
        text_out = gr.Textbox()
        text_input.submit(dash, text_input, text_out)
    with gr.Tab("AI Speech"):
        gr.Markdown("# Text to Speech")
        text_input2 = gr.Textbox(label="Enter Text")
        audio_output = gr.Audio(streaming=True)
        text_input2.submit(text_to_speech, text_input2, audio_output)
         

demo.launch(show_error=True)