Spaces:
Sleeping
Sleeping
Julian-Hans
commited on
Commit
·
9ef2a14
1
Parent(s):
4db0823
added attribution example, created local pipelines, implemented poc_app.py as a proof of concept
Browse files- app.py +0 -0
- attribution_example.py +8 -0
- blip_image_caption_large.py +13 -0
- config.py +3 -0
- musicgen_small.py +14 -0
- phi3_mini_4k_instruct.py +14 -0
- poc_app.py +53 -0
- requirements.txt +27 -0
app.py
ADDED
File without changes
|
attribution_example.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Example Code Attribution for AI-Generated Code
|
2 |
+
|
3 |
+
# ----ATTRIBUTION-START----
|
4 |
+
# LLM: Github Copilot
|
5 |
+
# PROMPT: write a hello world example
|
6 |
+
# EDITS: changed the wording to make it more personal
|
7 |
+
print("Hello, World! This is your Copilot speaking!")
|
8 |
+
# -----ATTRIBUTION-END-----
|
blip_image_caption_large.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# external imports
|
2 |
+
from transformers import pipeline
|
3 |
+
|
4 |
+
# local imports
|
5 |
+
import config
|
6 |
+
|
7 |
+
class Blip_Image_Caption_Large:
|
8 |
+
def __init__(self):
|
9 |
+
self.local_pipeline = pipeline("image-to-text", model=config.IMAGE_CAPTION_MODEL)
|
10 |
+
|
11 |
+
def caption_image_local_pipeline(self, image_path):
|
12 |
+
result = self.local_pipeline(image_path)
|
13 |
+
return result
|
config.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
IMAGE_CAPTION_MODEL = "Salesforce/blip-image-captioning-large"
|
2 |
+
LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
|
3 |
+
MUSICGEN_MODEL = "facebook/musicgen-small"
|
musicgen_small.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# external imports
|
2 |
+
from transformers import pipeline
|
3 |
+
import scipy
|
4 |
+
|
5 |
+
# local imports
|
6 |
+
import config
|
7 |
+
|
8 |
+
class Musicgen_Small:
|
9 |
+
def __init__(self):
|
10 |
+
self.local_pipeline = pipeline("text-to-audio", model=config.MUSICGEN_MODEL)
|
11 |
+
|
12 |
+
def generate_music_local_pipeline(self, prompt):
|
13 |
+
music = self.local_pipeline(prompt, forward_params={"do_sample": True})
|
14 |
+
scipy.io.wavfile.write("data/musicgen_out.wav", rate=music["sampling_rate"], data=music["audio"])
|
phi3_mini_4k_instruct.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# external imports
|
2 |
+
from transformers import pipeline
|
3 |
+
|
4 |
+
# local imports
|
5 |
+
import config
|
6 |
+
|
7 |
+
|
8 |
+
class Phi3_Mini_4k_Instruct:
|
9 |
+
def __init__(self):
|
10 |
+
self.local_pipeline = pipeline("text-generation", model=config.LLM_MODEL, trust_remote_code=True)
|
11 |
+
|
12 |
+
def generate_text_local_pipeline(self, messages):
|
13 |
+
result = self.local_pipeline(messages)
|
14 |
+
return result
|
poc_app.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# external imports
|
2 |
+
import time
|
3 |
+
|
4 |
+
# local imports
|
5 |
+
from blip_image_caption_large import Blip_Image_Caption_Large
|
6 |
+
from phi3_mini_4k_instruct import Phi3_Mini_4k_Instruct
|
7 |
+
from musicgen_small import Musicgen_Small
|
8 |
+
|
9 |
+
def main():
|
10 |
+
# test image captioning
|
11 |
+
image_caption_start_time = time.time()
|
12 |
+
image_caption_model = Blip_Image_Caption_Large()
|
13 |
+
test_caption = image_caption_model.caption_image_local_pipeline("data/test3.jpg")
|
14 |
+
print(test_caption)
|
15 |
+
image_caption_end_time = time.time()
|
16 |
+
|
17 |
+
# test text generation
|
18 |
+
text_generation_start_time = time.time()
|
19 |
+
text_generation_model = Phi3_Mini_4k_Instruct()
|
20 |
+
|
21 |
+
#TODO: move this to a config file
|
22 |
+
text_generation_model.local_pipeline.model.config.max_length = 200
|
23 |
+
|
24 |
+
#TODO: move system prompt somewhere else, allow for genre override
|
25 |
+
messages = [
|
26 |
+
{"role": "system", "content": "You are an image caption to song description converter with a deep understanding of Music and Art. You are given the caption of an image. Your task is to generate a textual description of a musical piece that fits the caption. The description should be detailed and vivid, and should include the genre, mood, instruments, tempo, and other relevant information about the music. You should also use your knowledge of art and visual aesthetics to create a musical piece that complements the image. Only output the description of the music, without any explanation or introduction. Be concise."},
|
27 |
+
{"role": "user", "content": test_caption[0]["generated_text"]},
|
28 |
+
]
|
29 |
+
test_text = text_generation_model.generate_text_local_pipeline(messages)
|
30 |
+
print(test_text)
|
31 |
+
text_generation_end_time = time.time()
|
32 |
+
|
33 |
+
|
34 |
+
# test audio generation
|
35 |
+
music_generation_start_time = time.time()
|
36 |
+
music_generation_model = Musicgen_Small()
|
37 |
+
music_generation_model.generate_music_local_pipeline(str(test_text[-1]['generated_text'][-1]['content']))
|
38 |
+
music_generation_end_time = time.time()
|
39 |
+
|
40 |
+
|
41 |
+
# calculate durations
|
42 |
+
image_caption_duration = image_caption_end_time - image_caption_start_time
|
43 |
+
text_generation_duration = text_generation_end_time - text_generation_start_time
|
44 |
+
music_generation_duration = music_generation_end_time - music_generation_start_time
|
45 |
+
total_duration = music_generation_end_time - image_caption_start_time
|
46 |
+
|
47 |
+
# output durations
|
48 |
+
print(f"Image Captioning Duration: {image_caption_duration}")
|
49 |
+
print(f"Text Generation Duration: {text_generation_duration}")
|
50 |
+
print(f"Music Generation Duration: {music_generation_duration}")
|
51 |
+
print(f"Total Duration: {total_duration}")
|
52 |
+
|
53 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
certifi==2024.8.30
|
2 |
+
charset-normalizer==3.3.2
|
3 |
+
filelock==3.16.0
|
4 |
+
fsspec==2024.9.0
|
5 |
+
huggingface-hub==0.24.6
|
6 |
+
idna==3.8
|
7 |
+
Jinja2==3.1.4
|
8 |
+
MarkupSafe==2.1.5
|
9 |
+
mpmath==1.3.0
|
10 |
+
networkx==3.3
|
11 |
+
numpy==2.1.1
|
12 |
+
packaging==24.1
|
13 |
+
pillow==10.4.0
|
14 |
+
PyYAML==6.0.2
|
15 |
+
regex==2024.9.11
|
16 |
+
requests==2.32.3
|
17 |
+
safetensors==0.4.5
|
18 |
+
scipy==1.14.1
|
19 |
+
sympy==1.13.2
|
20 |
+
tokenizers==0.19.1
|
21 |
+
torch==2.4.1
|
22 |
+
torchaudio==2.4.1
|
23 |
+
torchvision==0.19.1
|
24 |
+
tqdm==4.66.5
|
25 |
+
transformers==4.44.2
|
26 |
+
typing_extensions==4.12.2
|
27 |
+
urllib3==2.2.2
|