Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import time
|
4 |
+
import base64
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
import torch.nn.functional as F
|
9 |
+
import torch.optim as optim
|
10 |
+
import timm
|
11 |
+
import math
|
12 |
+
from PIL import Image, ImageDraw, ImageFont
|
13 |
+
from io import BytesIO
|
14 |
+
from diffusers import StableDiffusionPipeline
|
15 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
16 |
+
from transformers import AutoConfig
|
17 |
+
|
18 |
+
# Load Stable Diffusion Pipeline for generating images
|
19 |
+
pipe = StableDiffusionPipeline.from_pretrained("runway/stable-diffusion-v2-1")
|
20 |
+
|
21 |
+
# Load BERT model for chatbot
|
22 |
+
config = AutoConfig.from_pretrained("bert-base-cased")
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
24 |
+
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", config=config)
|
25 |
+
|
26 |
+
def generate_audio(text):
|
27 |
+
audio = pipe(text, audio=True).audio.squeeze()
|
28 |
+
audio = audio.detach().cpu().numpy()
|
29 |
+
audio = audio.astype(np.float32)
|
30 |
+
audio = audio / np.max(audio)
|
31 |
+
return audio
|
32 |
+
|
33 |
+
def generate_image(text):
|
34 |
+
image = pipe(text).images[0]
|
35 |
+
image = image.detach().cpu().numpy()
|
36 |
+
image = Image.fromarray(image)
|
37 |
+
return image
|
38 |
+
|
39 |
+
def generate_video(text, frames=30, width=512, height=512):
|
40 |
+
with torch.no_grad():
|
41 |
+
sequence = pipe.unet.unet(text).x.shape[0]
|
42 |
+
sequence = int(sequence * frames)
|
43 |
+
video = []
|
44 |
+
for I in range(frames):
|
45 |
+
img = pipe(text, return_dict=True).images[0]
|
46 |
+
img = F.interpolate(img, size=(height, width), mode="bilinear", align_corners=False)
|
47 |
+
video.append(img.permute(1, 2, 0).detach().cpu().numpy())
|
48 |
+
video = np.stack(video, axis=0)
|
49 |
+
video = (video * 255).astype(np.uint8)
|
50 |
+
video = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'MJPG'), frames, (width, height))
|
51 |
+
for frame in video.iterquene():
|
52 |
+
for I in range(width):
|
53 |
+
for j in range(height):
|
54 |
+
frame[i, j] = video[sequence - sequence // frames * frames + I * height + j]
|
55 |
+
video.write(frame)
|
56 |
+
video.release()
|
57 |
+
return cv2.VideoCapture('output.avi')[1]
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
def main():
|
62 |
+
st.title("Streamlit App with Diffusers and Transformers")
|
63 |
+
st.header("Generate Audio, Video, and Images using Diffusers")
|
64 |
+
st.header("Chatbot using BERT")
|
65 |
+
|
66 |
+
# Audio generation section
|
67 |
+
with st.form("audio_form"):
|
68 |
+
text_input = st.text_input("Enter text for audio generation:")
|
69 |
+
submit_button = st.form_submit_button("Generate Audio")
|
70 |
+
if submit_button:
|
71 |
+
audio_output = generate_audio(text_input)
|
72 |
+
audio_base64 = base64.b64encode(audio_output).decode('utf-8')
|
73 |
+
st.write(f"Generated Audio:")
|
74 |
+
st.audio(BytesIO(base64.b64decode(audio_base64)), format="audio/x-wav")
|
75 |
+
|
76 |
+
# Image generation section
|
77 |
+
with st.form("image_form"):
|
78 |
+
text_input = st.text_input("Enter text for image generation:")
|
79 |
+
submit_button = st.form_submit_button("Generate Image")
|
80 |
+
if submit_button:
|
81 |
+
image_output = generate_image(text_input)
|
82 |
+
image_base64 = base64.b64encode(image_output).decode('utf-8')
|
83 |
+
st.image(image_output, caption="Generated Image:", use_column_width=True)
|
84 |
+
st.write(f"Generated Image (base64): {image_base64}")
|
85 |
+
|
86 |
+
# Video generation section
|
87 |
+
with st.form("video_form"):
|
88 |
+
text_input = st.text_input("Enter text for video generation:")
|
89 |
+
frames = st.number_input("Number of frames:", value=30, step=1)
|
90 |
+
width = st.number_input("Image width:", value=512, step=1)
|
91 |
+
height = st.number_input("Image height:", value=512, step=1)
|
92 |
+
submit_button = st.form_submit_button("Generate Video")
|
93 |
+
if submit_button:
|
94 |
+
video_output = generate_video(text_input, frames, width, height)
|
95 |
+
st.write(f"Generated Video:")
|
96 |
+
|
97 |
+
# Chatbot section
|
98 |
+
with st.form("chat_form"):
|
99 |
+
user_input = st.text_area("Enter your message:", height=100)
|
100 |
+
submit_button = st.form_submit_button("Send Message")
|
101 |
+
if submit_button:
|
102 |
+
message = tokenizer(user_input, padding=True, return_tensors="pt").to("cpu")
|
103 |
+
outputs = model(message)
|
104 |
+
prediction = torch.argmax(outputs.logits, dim=-1).item()
|
105 |
+
response = tokenizer.decode(prediction, skip_special_tokens=True)
|
106 |
+
st.write(f"Assistant Response: {response}")
|
107 |
+
|
108 |
+
st.write("Streamlit App with Diffusers and Transformers")
|
109 |
+
st.write("Generated by FallnAI")
|