links
Browse files- README.md +13 -1
- api.py +2 -2
- audiocraft/genmodel.py +1 -35
- uc_spk_Landscape2Soundscape_Masterpieces_pics/04_Friedrich_FV317_001.txt +1 -1
- uc_spk_Landscape2Soundscape_Masterpieces_pics/06_Menzel_AI900_001.txt +1 -1
- uc_spk_Landscape2Soundscape_Masterpieces_pics/08_Monet_AI1013_001.txt +5 -4
- uc_spk_Landscape2Soundscape_Masterpieces_pics/12_Slevogt_AII1022_001.txt +3 -3
- uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____06_Menzel_AI900_001.jpg +0 -0
- uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____08_Monet_AI1013_001.jpg +0 -0
- uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____11_Liebermann_NG4-94_001.jpg +0 -0
- uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____12_Slevogt_AII1022_001.jpg +0 -0
README.md
CHANGED
@@ -101,10 +101,22 @@ For SHIFT demo / Collaboration with [SMB](https://www.smb.museum/home/)
|
|
101 |
|
102 |
[![03](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____03_Schinkel_WS200-002.jpg)](https://youtu.be/BhMh02knkco)
|
103 |
|
|
|
|
|
104 |
[![05](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____05_Blechen_FV40_001.jpg)](https://youtu.be/a3qk9S87v60)
|
105 |
|
|
|
|
|
|
|
|
|
106 |
[![10](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____10_Boecklin_967648_NG2-80_001_rsz.jpg)](https://www.youtube.com/watch?v=Y8QyYUgLaCg)
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
# Live Demo - Paplay
|
110 |
|
@@ -114,7 +126,7 @@ Flask
|
|
114 |
CUDA_DEVICE_ORDER=PCI_BUS_ID HF_HOME=/data/dkounadis/.hf7/ CUDA_VISIBLE_DEVICES=4 python live_api.py
|
115 |
```
|
116 |
|
117 |
-
Client
|
118 |
|
119 |
```python
|
120 |
python live_demo.py # will ask text input & play soundscape
|
|
|
101 |
|
102 |
[![03](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____03_Schinkel_WS200-002.jpg)](https://youtu.be/BhMh02knkco)
|
103 |
|
104 |
+
|
105 |
+
|
106 |
[![05](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____05_Blechen_FV40_001.jpg)](https://youtu.be/a3qk9S87v60)
|
107 |
|
108 |
+
[![06](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____06_Menzel_AI900_001.jpg)](https://youtu.be/3M0y9OYzDfU)
|
109 |
+
|
110 |
+
[![08](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____08_Monet_AI1013_001.jpg)](https://youtu.be/gnGCYLcdLsA)
|
111 |
+
|
112 |
[![10](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____10_Boecklin_967648_NG2-80_001_rsz.jpg)](https://www.youtube.com/watch?v=Y8QyYUgLaCg)
|
113 |
|
114 |
+
[![11]](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____11_Liebermann_NG4-94_001.jpg)](https://youtu.be/XDDzxDSrhb0)
|
115 |
+
|
116 |
+
[![12]](uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____12_Slevogt_AII1022_001.jpg)](https://youtu.be/I3YYKiUzHpA)
|
117 |
+
|
118 |
+
|
119 |
+
|
120 |
|
121 |
# Live Demo - Paplay
|
122 |
|
|
|
126 |
CUDA_DEVICE_ORDER=PCI_BUS_ID HF_HOME=/data/dkounadis/.hf7/ CUDA_VISIBLE_DEVICES=4 python live_api.py
|
127 |
```
|
128 |
|
129 |
+
Client - Describe any sound with words and it will be played back to you.
|
130 |
|
131 |
```python
|
132 |
python live_demo.py # will ask text input & play soundscape
|
api.py
CHANGED
@@ -49,7 +49,7 @@ def overlay(x, scene=None):
|
|
49 |
print('Generating AudioCraft')
|
50 |
back = [sound_generator.generate(
|
51 |
[scene]
|
52 |
-
)[0].detach().cpu().numpy()[0, :] for _ in range(
|
53 |
|
54 |
print([j.shape for j in back], len(back), 'BACK')
|
55 |
|
@@ -61,7 +61,7 @@ def overlay(x, scene=None):
|
|
61 |
)[0, :] for i in back]
|
62 |
print('Cloning backgrounds')
|
63 |
# clone/elongate by 4x
|
64 |
-
back = [(_shift(np.concatenate([single_gen] * 4))) for single_gen in back]
|
65 |
|
66 |
|
67 |
# long ~30s
|
|
|
49 |
print('Generating AudioCraft')
|
50 |
back = [sound_generator.generate(
|
51 |
[scene]
|
52 |
+
)[0].detach().cpu().numpy()[0, :] for _ in range(1)]
|
53 |
|
54 |
print([j.shape for j in back], len(back), 'BACK')
|
55 |
|
|
|
61 |
)[0, :] for i in back]
|
62 |
print('Cloning backgrounds')
|
63 |
# clone/elongate by 4x
|
64 |
+
back = [(_shift(np.concatenate([_shift(single_gen)] * 4))) for single_gen in back]
|
65 |
|
66 |
|
67 |
# long ~30s
|
audiocraft/genmodel.py
CHANGED
@@ -135,14 +135,7 @@ class BaseGenModel(ABC):
|
|
135 |
return self.generate_audio(tokens), tokens
|
136 |
return self.generate_audio(tokens)
|
137 |
|
138 |
-
def generate(self, descriptions
|
139 |
-
-> tp.Union[torch.Tensor, tp.Tuple[torch.Tensor, torch.Tensor]]:
|
140 |
-
"""Generate samples conditioned on text.
|
141 |
-
|
142 |
-
Args:
|
143 |
-
descriptions (list of str): A list of strings used as text conditioning.
|
144 |
-
progress (bool, optional): Flag to display progress of the generation process. Defaults to False.
|
145 |
-
"""
|
146 |
attributes, prompt_tokens = self._prepare_tokens_and_attributes(descriptions, None)
|
147 |
assert prompt_tokens is None
|
148 |
tokens = self._generate_tokens(attributes, prompt_tokens, progress)
|
@@ -150,33 +143,6 @@ class BaseGenModel(ABC):
|
|
150 |
return self.generate_audio(tokens), tokens
|
151 |
return self.generate_audio(tokens)
|
152 |
|
153 |
-
def generate_continuation(self, prompt: torch.Tensor, prompt_sample_rate: int,
|
154 |
-
descriptions: tp.Optional[tp.List[tp.Optional[str]]] = None,
|
155 |
-
progress: bool = False, return_tokens: bool = False) \
|
156 |
-
-> tp.Union[torch.Tensor, tp.Tuple[torch.Tensor, torch.Tensor]]:
|
157 |
-
"""Generate samples conditioned on audio prompts and an optional text description.
|
158 |
-
|
159 |
-
Args:
|
160 |
-
prompt (torch.Tensor): A batch of waveforms used for continuation.
|
161 |
-
Prompt should be [B, C, T], or [C, T] if only one sample is generated.
|
162 |
-
prompt_sample_rate (int): Sampling rate of the given audio waveforms.
|
163 |
-
descriptions (list of str, optional): A list of strings used as text conditioning. Defaults to None.
|
164 |
-
progress (bool, optional): Flag to display progress of the generation process. Defaults to False.
|
165 |
-
"""
|
166 |
-
if prompt.dim() == 2:
|
167 |
-
prompt = prompt[None]
|
168 |
-
if prompt.dim() != 3:
|
169 |
-
raise ValueError("prompt should have 3 dimensions: [B, C, T] (C = 1).")
|
170 |
-
prompt = convert_audio(prompt, prompt_sample_rate, self.sample_rate, self.audio_channels)
|
171 |
-
if descriptions is None:
|
172 |
-
descriptions = [None] * len(prompt)
|
173 |
-
attributes, prompt_tokens = self._prepare_tokens_and_attributes(descriptions, prompt)
|
174 |
-
assert prompt_tokens is not None
|
175 |
-
tokens = self._generate_tokens(attributes, prompt_tokens, progress)
|
176 |
-
if return_tokens:
|
177 |
-
return self.generate_audio(tokens), tokens
|
178 |
-
return self.generate_audio(tokens)
|
179 |
-
|
180 |
def _generate_tokens(self, attributes: tp.List[ConditioningAttributes],
|
181 |
prompt_tokens: tp.Optional[torch.Tensor], progress: bool = False) -> torch.Tensor:
|
182 |
"""Generate discrete audio tokens given audio prompt and/or conditions.
|
|
|
135 |
return self.generate_audio(tokens), tokens
|
136 |
return self.generate_audio(tokens)
|
137 |
|
138 |
+
def generate(self, descriptions, progress = False, return_tokens= False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
attributes, prompt_tokens = self._prepare_tokens_and_attributes(descriptions, None)
|
140 |
assert prompt_tokens is None
|
141 |
tokens = self._generate_tokens(attributes, prompt_tokens, progress)
|
|
|
143 |
return self.generate_audio(tokens), tokens
|
144 |
return self.generate_audio(tokens)
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
def _generate_tokens(self, attributes: tp.List[ConditioningAttributes],
|
147 |
prompt_tokens: tp.Optional[torch.Tensor], progress: bool = False) -> torch.Tensor:
|
148 |
"""Generate discrete audio tokens given audio prompt and/or conditions.
|
uc_spk_Landscape2Soundscape_Masterpieces_pics/04_Friedrich_FV317_001.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
In
|
2 |
|
3 |
Caspar David Friedrich had never actually visited the Alps, yet he created several paintings of these mountains. For his iconic work The Watzmann, he used a variety of sources. A key reference was a watercolor study of the mountain’s summit captured by his pupil Johann August Heinrich, now in the National Museum of Oslo. Friedrich also drew his own sketches from travels in the Harz Mountain range. The rocky formation in the foreground was based on his drawings of the Trudenstein at Hohnekopf, near Brocken summit in June 28 1811.
|
4 |
|
|
|
1 |
+
In 18 24, the Literary Conversation newspaper wrote about the Dresden Academy Exhibition, mentioning a beautiful large landscape by Professor Friedrich: a solitary mountain region... splendidly depicted with varying tones across the upper mountain ranges. In the foreground, grasses and small trees cling to the towering basalt rocks. Higher up, mist curls around the desolate ridges, and at the top, the shimmering rock faces, crowned with eternal snow, glow in untarnished clarity. The solitude here is eerie; one longs to see at least an eagle or chamois—yet in vain, no life dwells here except for the air and light. Every pulse of emotion seems to falter at this height.
|
2 |
|
3 |
Caspar David Friedrich had never actually visited the Alps, yet he created several paintings of these mountains. For his iconic work The Watzmann, he used a variety of sources. A key reference was a watercolor study of the mountain’s summit captured by his pupil Johann August Heinrich, now in the National Museum of Oslo. Friedrich also drew his own sketches from travels in the Harz Mountain range. The rocky formation in the foreground was based on his drawings of the Trudenstein at Hohnekopf, near Brocken summit in June 28 1811.
|
4 |
|
uc_spk_Landscape2Soundscape_Masterpieces_pics/06_Menzel_AI900_001.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
Throughout his life, Menzel was drawn to the newly developed areas on the outskirts of the city, and his daily routes often took him past construction sites and wastelands. While the representative buildings of Berlin, redesigned by Schinkel, only casually captured his interest, many of his sketches and paintings from the
|
|
|
1 |
+
Throughout his life, Menzel was drawn to the newly developed areas on the outskirts of the city, and his daily routes often took him past construction sites and wastelands. While the representative buildings of Berlin, redesigned by Schinkel, only casually captured his interest, many of his sketches and paintings from the 18 40s feature sandy paths, construction sites, and inconspicuous temporary structures. He frequently depicted the banks of the Schafgraben, now known as the Landwehrkanal, near his home, dedicating a print to its surreal stillness. A number of particularly dynamic pencil drawings illustrate the ruined trunks and wild branches of willows lining the water, which were also admired by outsiders. The painting Construction Site with Willows is characterized by a silvery blue-green that powerfully pushes into the foreground, compressing the spatial elements to the right and left. Starting with a milky tone and transitioning into a brown shadow zone sprinkled with light spots, it creates a clear complementary contrast with the bright red of the construction site in the background. Here, along with the yellow of the building behind, the effect of strong sunlight accumulates under the dull blue sky. Various materialities—intricate foliage and soft grass, shimmering water, and dirty sand—demand different brush techniques, which can be delicate, summarily executed, or rough. Unexpectedly, intricate human activity becomes visible in the background, further enriching the scene and highlighting Menzel's engagement with the interplay between nature and urban life.
|
uc_spk_Landscape2Soundscape_Masterpieces_pics/08_Monet_AI1013_001.txt
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
-
In
|
2 |
|
3 |
-
In the spring of 1874, Monet and his fellow artists showcased their works
|
|
|
4 |
|
5 |
-
Among the paintings Monet
|
6 |
|
7 |
-
Paul Cézanne later remarked to Ambroise Vollard that Monet
|
|
|
1 |
+
In 18 66 while painting the view of the Church of Saint Germain l’Auxerrois, Claude Monet found himself wavering between a dark-toned architectural depiction and the dissolution of figures in light. This work marks an early piece of Impressionism. Eight years later, his vibrant painting titled "Sommertag" from 1874 can be regarded as an archetypal painting of the Impressionist movement.
|
2 |
|
3 |
+
In the spring of 1874, Monet and his fellow artists showcased their works at the studio of the photographer Nadar on Boulevard des Capoucines in Paris. A critique by Louis Leroy of Charivari on April 25, 1874 referenced Monet's painting as Impression of Sunrise from 1872 (Musée Marmottan Monet, Paris),
|
4 |
+
coining the term of Impressionism.
|
5 |
|
6 |
+
Among the paintings Monet drew in the summer of 1874 is this sun-drenched landscape featuring a wide meadow with hazy blue mountains in the distance. Monet captured the grass, trees, mountains, and figures with short, colorful, and delicately applied brushstrokes, convincingly conveying bright light and lethe. The figures—Monet's wife Camille, his son Jean, and a third person—are highlighted by their bright clothing but are not portrayed more distinctly than the wind-blown trees or the colorful shadows on the yellow grass. As Leroy rightly observed, Monet's focus was on pure impression.
|
7 |
|
8 |
+
Paul Cézanne later remarked to Ambroise Vollard that Monet had an eye, but what an eye! Around the same time, Hans Thoma in Munich, influenced by Böcklin, also created a painting titled "Summer" (1872, National Gallery, Inv. No. A II 510). Thoma's vivid symbolism was met with both praise and criticism from contemporary critics, depending on their aesthetic stance. However, upon closer examination, these two summer depictions reveal surprising similarities: in both cases, the downplaying of figures avoids any genre-specific elements, rendering the fleeting moment timeless.
|
uc_spk_Landscape2Soundscape_Masterpieces_pics/12_Slevogt_AII1022_001.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
Around 1900
|
2 |
|
3 |
-
Lichtwark entrusted commissions to painters he highly
|
4 |
|
5 |
-
|
|
|
1 |
+
Around 1900 the Hamburger Kunsthalle, under the direction of Alfred Lichtwark, was particularly active in art education. A key aspect of his vision was the development of a collection showcasing works from the Hanseatic city. Artists were commissioned by Lichtwark to portray prominent Hamburg citizens and city views.
|
2 |
|
3 |
+
Lichtwark entrusted commissions to painters he highly admired, this is how Slevogt created a portrait of Senator William Henry O’Swald for the Kunsthalle in 1905.
|
4 |
|
5 |
+
He also produced several landscapes of the Alster river including this one. While Max Liebermann, in his painting Sommerabend an der Alster focused on elegant ladies in their boats, Slevogt paid little attention to human figures. Instead was captivated by the atmospheric qualities of the skies and waters the shading colors and light of the evening.
|
uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____06_Menzel_AI900_001.jpg
ADDED
uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____08_Monet_AI1013_001.jpg
ADDED
uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____11_Liebermann_NG4-94_001.jpg
ADDED
uc_spk_Landscape2Soundscape_Masterpieces_pics/thumb____12_Slevogt_AII1022_001.jpg
ADDED