Falln87 commited on
Commit
52f4685
·
verified ·
1 Parent(s): 0b14976

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ import base64
5
+ import numpy as np
6
+ import torch
7
+ import torch.nn as nn
8
+ import torch.nn.functional as F
9
+ import torch.optim as optim
10
+ import timm
11
+ import math
12
+ from PIL import Image, ImageDraw, ImageFont
13
+ from io import BytesIO
14
+ from diffusers import StableDiffusionPipeline
15
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
16
+ from transformers import AutoConfig
17
+
18
+ # Load Stable Diffusion Pipeline for generating images
19
+ pipe = StableDiffusionPipeline.from_pretrained("runway/stable-diffusion-v2-1")
20
+
21
+ # Load BERT model for chatbot
22
+ config = AutoConfig.from_pretrained("bert-base-cased")
23
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
24
+ model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", config=config)
25
+
26
+ def generate_audio(text):
27
+ audio = pipe(text, audio=True).audio.squeeze()
28
+ audio = audio.detach().cpu().numpy()
29
+ audio = audio.astype(np.float32)
30
+ audio = audio / np.max(audio)
31
+ return audio
32
+
33
+ def generate_image(text):
34
+ image = pipe(text).images[0]
35
+ image = image.detach().cpu().numpy()
36
+ image = Image.fromarray(image)
37
+ return image
38
+
39
+ def generate_video(text, frames=30, width=512, height=512):
40
+ with torch.no_grad():
41
+ sequence = pipe.unet.unet(text).x.shape[0]
42
+ sequence = int(sequence * frames)
43
+ video = []
44
+ for I in range(frames):
45
+ img = pipe(text, return_dict=True).images[0]
46
+ img = F.interpolate(img, size=(height, width), mode="bilinear", align_corners=False)
47
+ video.append(img.permute(1, 2, 0).detach().cpu().numpy())
48
+ video = np.stack(video, axis=0)
49
+ video = (video * 255).astype(np.uint8)
50
+ video = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'MJPG'), frames, (width, height))
51
+ for frame in video.iterquene():
52
+ for I in range(width):
53
+ for j in range(height):
54
+ frame[i, j] = video[sequence - sequence // frames * frames + I * height + j]
55
+ video.write(frame)
56
+ video.release()
57
+ return cv2.VideoCapture('output.avi')[1]
58
+
59
+
60
+
61
+ def main():
62
+ st.title("Streamlit App with Diffusers and Transformers")
63
+ st.header("Generate Audio, Video, and Images using Diffusers")
64
+ st.header("Chatbot using BERT")
65
+
66
+ # Audio generation section
67
+ with st.form("audio_form"):
68
+ text_input = st.text_input("Enter text for audio generation:")
69
+ submit_button = st.form_submit_button("Generate Audio")
70
+ if submit_button:
71
+ audio_output = generate_audio(text_input)
72
+ audio_base64 = base64.b64encode(audio_output).decode('utf-8')
73
+ st.write(f"Generated Audio:")
74
+ st.audio(BytesIO(base64.b64decode(audio_base64)), format="audio/x-wav")
75
+
76
+ # Image generation section
77
+ with st.form("image_form"):
78
+ text_input = st.text_input("Enter text for image generation:")
79
+ submit_button = st.form_submit_button("Generate Image")
80
+ if submit_button:
81
+ image_output = generate_image(text_input)
82
+ image_base64 = base64.b64encode(image_output).decode('utf-8')
83
+ st.image(image_output, caption="Generated Image:", use_column_width=True)
84
+ st.write(f"Generated Image (base64): {image_base64}")
85
+
86
+ # Video generation section
87
+ with st.form("video_form"):
88
+ text_input = st.text_input("Enter text for video generation:")
89
+ frames = st.number_input("Number of frames:", value=30, step=1)
90
+ width = st.number_input("Image width:", value=512, step=1)
91
+ height = st.number_input("Image height:", value=512, step=1)
92
+ submit_button = st.form_submit_button("Generate Video")
93
+ if submit_button:
94
+ video_output = generate_video(text_input, frames, width, height)
95
+ st.write(f"Generated Video:")
96
+
97
+ # Chatbot section
98
+ with st.form("chat_form"):
99
+ user_input = st.text_area("Enter your message:", height=100)
100
+ submit_button = st.form_submit_button("Send Message")
101
+ if submit_button:
102
+ message = tokenizer(user_input, padding=True, return_tensors="pt").to("cpu")
103
+ outputs = model(message)
104
+ prediction = torch.argmax(outputs.logits, dim=-1).item()
105
+ response = tokenizer.decode(prediction, skip_special_tokens=True)
106
+ st.write(f"Assistant Response: {response}")
107
+
108
+ st.write("Streamlit App with Diffusers and Transformers")
109
+ st.write("Generated by FallnAI")