Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
|
|
21 |
print("------------------------- 4 -------------------------\n")
|
22 |
|
23 |
|
24 |
-
def predict(image,max_length=64, num_beams=4):
|
25 |
image = image.convert('RGB')
|
26 |
image = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
|
27 |
clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0]
|
@@ -49,24 +49,28 @@ title = "Image to Text ViT with LORA"
|
|
49 |
# interface.launch(debug=True)
|
50 |
|
51 |
with gr.Blocks() as demo:
|
|
|
52 |
|
53 |
gr.HTML(
|
54 |
"""
|
55 |
<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
|
56 |
<h1 style="font-weight: 900; font-size: 3rem; margin: 0rem">
|
57 |
-
ViT Image-to-Text
|
58 |
</h1>
|
59 |
<h2 style="text-align: left; font-weight: 450; font-size: 1rem; margin-top: 2rem; margin-bottom: 1.5rem">
|
60 |
In the field of large language models, the challenge of fine-tuning has long perplexed researchers. Microsoft, however, has unveiled an innovative solution called <b>Low-Rank Adaptation (LoRA)</b>. With the emergence of behemoth models like GPT-3 boasting billions of parameters, the cost of fine-tuning them for specific tasks or domains has become exorbitant.
|
61 |
LoRA offers a groundbreaking approach by freezing the weights of pre-trained models and introducing trainable layers known as <b>rank-decomposition matrices in each transformer block</b>. This ingenious technique significantly reduces the number of trainable parameters and minimizes GPU memory requirements, as gradients no longer need to be computed for the majority of model weights.
|
62 |
<br>
|
63 |
<br>
|
64 |
-
You can find more info here: <a href="https://www.linkedin.com/pulse/fine-tuning-image-to-text-algorithms-with-lora-daniel-puente-viejo" target="_blank"
|
65 |
</h2>
|
66 |
|
67 |
</div>
|
68 |
""")
|
69 |
-
gr.
|
70 |
-
|
|
|
|
|
|
|
71 |
|
72 |
demo.launch(debug=True)
|
|
|
21 |
print("------------------------- 4 -------------------------\n")
|
22 |
|
23 |
|
24 |
+
def predict(image, max_length=64, num_beams=4):
|
25 |
image = image.convert('RGB')
|
26 |
image = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
|
27 |
clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0]
|
|
|
49 |
# interface.launch(debug=True)
|
50 |
|
51 |
with gr.Blocks() as demo:
|
52 |
+
|
53 |
|
54 |
gr.HTML(
|
55 |
"""
|
56 |
<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
|
57 |
<h1 style="font-weight: 900; font-size: 3rem; margin: 0rem">
|
58 |
+
ViT Image-to-Text with LORA
|
59 |
</h1>
|
60 |
<h2 style="text-align: left; font-weight: 450; font-size: 1rem; margin-top: 2rem; margin-bottom: 1.5rem">
|
61 |
In the field of large language models, the challenge of fine-tuning has long perplexed researchers. Microsoft, however, has unveiled an innovative solution called <b>Low-Rank Adaptation (LoRA)</b>. With the emergence of behemoth models like GPT-3 boasting billions of parameters, the cost of fine-tuning them for specific tasks or domains has become exorbitant.
|
62 |
LoRA offers a groundbreaking approach by freezing the weights of pre-trained models and introducing trainable layers known as <b>rank-decomposition matrices in each transformer block</b>. This ingenious technique significantly reduces the number of trainable parameters and minimizes GPU memory requirements, as gradients no longer need to be computed for the majority of model weights.
|
63 |
<br>
|
64 |
<br>
|
65 |
+
You can find more info here: <a href="https://www.linkedin.com/pulse/fine-tuning-image-to-text-algorithms-with-lora-daniel-puente-viejo" target="_blank" style="text-decoration: underline;>Linkedin article</a>
|
66 |
</h2>
|
67 |
|
68 |
</div>
|
69 |
""")
|
70 |
+
with gr.Row():
|
71 |
+
with gr.Column(scale=1):
|
72 |
+
gr.inputs.Image(label="Upload any Image", type = 'pil', optional=True)
|
73 |
+
with gr.Column(scale=1):
|
74 |
+
gr.outputs.Textbox(type="text",label="Captions")
|
75 |
|
76 |
demo.launch(debug=True)
|