File size: 3,028 Bytes
46afb8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89d7cb0
 
46afb8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f291f2f
46afb8f
 
 
 
 
 
 
 
 
f291f2f
6808d52
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import whisper
import gradio as gr 
import time
import warnings
import torch
import os
import openai
from PIL import Image
import requests
from io import BytesIO
import time

time_now = time.time()

 
# openai.api_key = '' # your api key
openai.api_key = os.environ["api_key"]
model = whisper.load_model("base")
# model = whisper.load_model("medium.en")
def transcribe(audio, text, langauge):

    if audio is None:
        result_text = text
    else:
        # load audio and pad/trim it to fit 30 seconds
        audio = whisper.load_audio(audio)
        audio = whisper.pad_or_trim(audio)

        # make log-Mel spectrogram and move to the same device as the model
        mel = whisper.log_mel_spectrogram(audio).to(model.device)

        # detect the spoken language
        _, probs = model.detect_language(mel)

        # decode the audio
        options = whisper.DecodingOptions(fp16 = False)
        result = whisper.decode(model, mel, options)
        result_text = result.text

    # ...


    response = openai.Completion.create(
      model="text-davinci-003",
      prompt=f"write a full poem on  {result_text}in {langauge}language",
      temperature=0.7,
      max_tokens=1000,
      top_p=1,
      frequency_penalty=0,
      presence_penalty=0
    )
    out_result=response.choices[0].text



    response = openai.Image.create(
      prompt=f" {result_text}"+"animated 4k",
      n=1,
      size="512x512"#"1024x1024"
    )
    image_url = response['data'][0]['url']

    response = requests.get(image_url)
    out_image = Image.open(BytesIO(response.content))

    return [result_text
            , out_result
            , out_image
            ]

output_1 = gr.Textbox(label="Speech to Text")
output_2 = gr.Textbox(label="GPT-3 Davinci Output")
output_3 = gr.Image(label="DallE Output")

gr.Interface(
    fn=transcribe, 
    inputs=[
        gr.inputs.Audio(source="microphone",label="use whisper tell app topic of poem ,  use text input below if you have problem with mic", type="filepath"),
        gr.Textbox(label="poem on(Use english word mostly)"),
        gr.Textbox(label="language")
    ],

    outputs=[
        output_1
        ,  output_2
        , output_3
    ],
             title = "" +'Children of heaven🌸🏡: Create Beautiful multilingual Poems with Relevant Images'+ "",
                                    description="Children of heaven🌸🏡is a web app that uses artificial intelligence to generate beautiful multilingual  poems and relevant images. With its powerful language GPT3 model, it can create unique and inspiring multilingual poems on a wide range of childrens' topics, and its Dall E model   creates images that perfectly complement the poem. Give children of heaven   a try and discover the magic of multilingual poetry and art. Whether you're a   professional or kid , this app is sure to spark your creativity and inspire you to create something beautiful.this is research work kindly report any bias in content in community section "
    ).launch(debug=True)