Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,154 +1,178 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
#
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
-
model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
|
11 |
-
|
12 |
-
if torch.cuda.is_available():
|
13 |
-
torch_dtype = torch.float16
|
14 |
-
else:
|
15 |
-
torch_dtype = torch.float32
|
16 |
-
|
17 |
-
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
18 |
-
pipe = pipe.to(device)
|
19 |
-
|
20 |
-
MAX_SEED = np.iinfo(np.int32).max
|
21 |
-
MAX_IMAGE_SIZE = 1024
|
22 |
-
|
23 |
-
|
24 |
-
# @spaces.GPU #[uncomment to use ZeroGPU]
|
25 |
-
def infer(
|
26 |
-
prompt,
|
27 |
-
negative_prompt,
|
28 |
-
seed,
|
29 |
-
randomize_seed,
|
30 |
-
width,
|
31 |
-
height,
|
32 |
-
guidance_scale,
|
33 |
-
num_inference_steps,
|
34 |
-
progress=gr.Progress(track_tqdm=True),
|
35 |
-
):
|
36 |
-
if randomize_seed:
|
37 |
-
seed = random.randint(0, MAX_SEED)
|
38 |
|
39 |
-
generator = torch.Generator().manual_seed(seed)
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
negative_prompt=negative_prompt,
|
44 |
-
guidance_scale=guidance_scale,
|
45 |
-
num_inference_steps=num_inference_steps,
|
46 |
-
width=width,
|
47 |
-
height=height,
|
48 |
-
generator=generator,
|
49 |
-
).images[0]
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
"An astronaut riding a green horse",
|
57 |
-
"A delicious ceviche cheesecake slice",
|
58 |
-
]
|
59 |
-
|
60 |
-
css = """
|
61 |
-
#col-container {
|
62 |
-
margin: 0 auto;
|
63 |
-
max-width: 640px;
|
64 |
-
}
|
65 |
-
"""
|
66 |
-
|
67 |
-
with gr.Blocks(css=css) as demo:
|
68 |
-
with gr.Column(elem_id="col-container"):
|
69 |
-
gr.Markdown(" # Text-to-Image Gradio Template")
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
)
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
90 |
)
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
)
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
step=32,
|
116 |
-
value=1024, # Replace with defaults that work for your model
|
117 |
-
)
|
118 |
-
|
119 |
-
with gr.Row():
|
120 |
-
guidance_scale = gr.Slider(
|
121 |
-
label="Guidance scale",
|
122 |
-
minimum=0.0,
|
123 |
-
maximum=10.0,
|
124 |
-
step=0.1,
|
125 |
-
value=0.0, # Replace with defaults that work for your model
|
126 |
-
)
|
127 |
-
|
128 |
-
num_inference_steps = gr.Slider(
|
129 |
-
label="Number of inference steps",
|
130 |
-
minimum=1,
|
131 |
-
maximum=50,
|
132 |
-
step=1,
|
133 |
-
value=2, # Replace with defaults that work for your model
|
134 |
-
)
|
135 |
-
|
136 |
-
gr.Examples(examples=examples, inputs=[prompt])
|
137 |
-
gr.on(
|
138 |
-
triggers=[run_button.click, prompt.submit],
|
139 |
-
fn=infer,
|
140 |
-
inputs=[
|
141 |
-
prompt,
|
142 |
-
negative_prompt,
|
143 |
-
seed,
|
144 |
-
randomize_seed,
|
145 |
-
width,
|
146 |
-
height,
|
147 |
-
guidance_scale,
|
148 |
-
num_inference_steps,
|
149 |
-
],
|
150 |
-
outputs=[result, seed],
|
151 |
-
)
|
152 |
-
|
153 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
demo.launch()
|
|
|
1 |
+
# ----------------------------------------------------------------------------
|
2 |
+
# Copyright (c) 2024 Amar Ali-bey
|
3 |
+
#
|
4 |
+
# OpenVPRLab: https://github.com/amaralibey/nanoCLIP
|
5 |
+
#
|
6 |
+
# Licensed under the MIT License. See LICENSE file in the project root.
|
7 |
+
# ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
|
|
9 |
|
10 |
+
from pathlib import Path
|
11 |
+
from typing import List, Tuple, Optional
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
import torch
|
14 |
+
import torch.nn.functional as F
|
15 |
+
import faiss
|
16 |
+
from transformers import AutoTokenizer
|
17 |
+
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
from src.models import TextEncoder
|
20 |
+
from deployment.load_album import AlbumDataset
|
21 |
+
|
22 |
+
class ImageSearchEngine:
|
23 |
+
def __init__(
|
24 |
+
self,
|
25 |
+
model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
|
26 |
+
output_dim: int = 64,
|
27 |
+
gallery_folder: str = "photos",
|
28 |
+
device: str = 'cpu'
|
29 |
+
):
|
30 |
+
if device == 'cuda' and not torch.cuda.is_available():
|
31 |
+
print("CUDA is not available. Using CPU instead.")
|
32 |
+
device = 'cpu'
|
33 |
+
self.device = torch.device(device)
|
34 |
+
self.setup_model(model_name, output_dim)
|
35 |
+
self.setup_gallery(gallery_folder)
|
36 |
+
|
37 |
+
def setup_model(self, model_name: str, output_dim: int) -> None:
|
38 |
+
"""Initialize and load the text encoder model."""
|
39 |
+
self.txt_encoder = TextEncoder(
|
40 |
+
output_dim=output_dim,
|
41 |
+
lang_model=model_name
|
42 |
+
).to(self.device)
|
43 |
+
|
44 |
+
# Load the pre-trained weights for the text encoder
|
45 |
+
#
|
46 |
+
weights_path = Path(__file__).parent.resolve() / 'txt_encoder_state_dict.pth'
|
47 |
+
# check if the weights file exists
|
48 |
+
if not weights_path.exists():
|
49 |
+
raise FileNotFoundError(f"Text encoder weights not found: {weights_path}, make sure to run the create_index.py script.")
|
50 |
+
weights = torch.load(weights_path, map_location=self.device, weights_only=True)
|
51 |
+
self.txt_encoder.load_state_dict(weights)
|
52 |
+
self.txt_encoder.eval()
|
53 |
+
|
54 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
55 |
+
|
56 |
+
def setup_gallery(self, gallery_folder: str) -> None:
|
57 |
+
"""Setup the image gallery and FAISS index."""
|
58 |
+
gallery_path = Path(__file__).parent.resolve() / f'gallery/{gallery_folder}'
|
59 |
+
# check if the gallery folder exists
|
60 |
+
if not gallery_path.exists():
|
61 |
+
raise FileNotFoundError(f"Album folder {gallery_path} not found")
|
62 |
+
# we use the AlbumDataset class to load the image paths (we won't load the images themselves)
|
63 |
+
# this is more efficient than loading the images directly, because Gradio will load them
|
64 |
+
# given the paths returned by the search method.
|
65 |
+
self.dataset = AlbumDataset(gallery_path, transform=None)
|
66 |
+
|
67 |
+
# Load the FAISS index
|
68 |
+
# the index file should be in the same folder as the gallery
|
69 |
+
# and has the same name as the folder being indexed
|
70 |
+
index_path = gallery_path.parent / f"{gallery_folder}.faiss"
|
71 |
+
self.index = faiss.read_index(index_path.as_posix())
|
72 |
+
|
73 |
+
@torch.no_grad()
|
74 |
+
def encode_query(self, query_text: str) -> torch.Tensor:
|
75 |
+
"""Encode the text query into embeddings."""
|
76 |
+
inputs = self.tokenizer(query_text, truncation=True, return_tensors="pt")
|
77 |
+
inputs = inputs['input_ids'].to(self.device)
|
78 |
+
|
79 |
+
embedding = self.txt_encoder(inputs)
|
80 |
+
embedding = F.normalize(embedding, p=2, dim=1)
|
81 |
+
return embedding.cpu()
|
82 |
+
|
83 |
+
def search(self, query_text: str, k: int = 10) -> List[Tuple[str, Optional[str]]]:
|
84 |
+
"""Search for images matching the query text."""
|
85 |
+
if len(query_text) < 3: # avoid searching for very short queries
|
86 |
+
return []
|
87 |
+
|
88 |
+
query_embedding = self.encode_query(query_text)
|
89 |
+
dist, indices = self.index.search(query_embedding, k)
|
90 |
+
# you can filter results according to a threshold on the distance
|
91 |
+
return [(self.dataset.imgs[idx], None) for idx in indices[0]]
|
92 |
+
|
93 |
+
class GalleryUI:
|
94 |
+
def __init__(self, search_engine: ImageSearchEngine):
|
95 |
+
self.search_engine = search_engine
|
96 |
+
self.css_path = Path(__file__).parent / 'style.css'
|
97 |
+
|
98 |
+
def load_css(self) -> str:
|
99 |
+
"""Load CSS styles from file."""
|
100 |
+
with open(self.css_path) as f:
|
101 |
+
return f.read()
|
102 |
+
|
103 |
+
def create_interface(self) -> gr.Blocks:
|
104 |
+
"""Create the Gradio interface."""
|
105 |
+
with gr.Blocks(css=self.load_css(), theme=gr.themes.Soft(text_size='lg')) as demo:
|
106 |
+
with gr.Column(elem_classes="container"):
|
107 |
+
self._create_header()
|
108 |
+
self._create_search_section()
|
109 |
+
self._create_footer()
|
110 |
+
|
111 |
+
self._setup_callbacks(demo)
|
112 |
+
return demo
|
113 |
+
|
114 |
+
def _create_header(self) -> None:
|
115 |
+
"""Create the header section."""
|
116 |
+
with gr.Column(elem_classes="header"):
|
117 |
+
gr.Markdown("# Gallery Search")
|
118 |
+
gr.Markdown("Search through your collection of photos with AI")
|
119 |
+
|
120 |
+
def _create_search_section(self) -> None:
|
121 |
+
"""Create the search interface section."""
|
122 |
+
with gr.Column():
|
123 |
+
self.query_text = gr.Textbox(
|
124 |
+
placeholder="Example: Riding my horse",
|
125 |
+
label="Search Query",
|
126 |
+
elem_classes="search-input",
|
127 |
+
autofocus=True,
|
128 |
+
container=False
|
129 |
)
|
130 |
+
|
131 |
+
with gr.Row(visible=False): # this is hidden for now, but you can show it if you want
|
132 |
+
self.number_of_results = gr.Dropdown(
|
133 |
+
choices=[4,8,12,16,24,30],
|
134 |
+
value=30,
|
135 |
+
label="Results per page",
|
136 |
+
elem_classes="dropdown"
|
137 |
+
)
|
138 |
+
|
139 |
+
self.gallery = gr.Gallery(
|
140 |
+
label="Search Results",
|
141 |
+
columns=3,
|
142 |
+
object_fit="cover",
|
143 |
+
elem_classes="gallery",
|
144 |
+
container=False,
|
145 |
)
|
146 |
+
|
147 |
+
def _create_footer(self) -> None:
|
148 |
+
"""Create the footer section."""
|
149 |
+
with gr.Column(elem_classes="footer"):
|
150 |
+
gr.Markdown(
|
151 |
+
"""Created by [Amar Ali-bey](https://amaralibey.github.io) |
|
152 |
+
[View on GitHub](https://github.com/amaralibey/nanoCLIP)"""
|
153 |
)
|
154 |
+
|
155 |
+
def _setup_callbacks(self, demo: gr.Blocks) -> None:
|
156 |
+
"""Setup the interface callbacks."""
|
157 |
+
self.query_text.change(
|
158 |
+
self.search_engine.search,
|
159 |
+
inputs=[self.query_text, self.number_of_results],
|
160 |
+
outputs=self.gallery,
|
161 |
+
show_progress='hidden',
|
162 |
+
)
|
163 |
+
|
164 |
+
self.number_of_results.change(
|
165 |
+
self.search_engine.search,
|
166 |
+
inputs=[self.query_text, self.number_of_results],
|
167 |
+
outputs=self.gallery
|
168 |
+
)
|
169 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
if __name__ == "__main__":
|
171 |
+
search_engine = ImageSearchEngine(
|
172 |
+
model_name = "sentence-transformers/all-MiniLM-L6-v2",
|
173 |
+
output_dim = 64,
|
174 |
+
gallery_folder = "photos",
|
175 |
+
)
|
176 |
+
ui = GalleryUI(search_engine)
|
177 |
+
demo = ui.create_interface()
|
178 |
demo.launch()
|