kimyoungjune
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -22,12 +22,14 @@ library_name: transformers
|
|
22 |
|
23 |
- **Developed by:** NC Research, Multimodal Generation Team
|
24 |
- **Technical Report:** [Coming Soon]()
|
|
|
25 |
- **Languages:** Korean, English
|
26 |
- **License:** CC BY-NC 4.0
|
27 |
- **Architecture:** VARCO-VISION-14B follows the architecture of [LLaVA-OneVision](https://arxiv.org/abs/2408.03326).
|
28 |
- **Base Model:**
|
29 |
- **Language Model:** [Qwen/Qwen2.5-14B-Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct)
|
30 |
- **Vision Encoder:** [google/siglip-so400m-patch14-384](https://huggingface.co/google/siglip-so400m-patch14-384)
|
|
|
31 |
|
32 |
|
33 |
|
@@ -49,6 +51,7 @@ After installing **LLaVA-NeXT**, you can load VARCO-VISION-14B using the followi
|
|
49 |
import torch
|
50 |
from transformers import AutoTokenizer
|
51 |
from llava.model.language_model.llava_qwen import LlavaQwenForCausalLM
|
|
|
52 |
from llava.mm_utils import tokenizer_image_token, process_images
|
53 |
|
54 |
model_name = "NCSOFT/VARCO-VISION-14B"
|
@@ -179,6 +182,7 @@ To perform Optical Character Recognition (OCR), use the `<ocr>` token.
|
|
179 |
|
180 |
```python
|
181 |
image_file = "./assets/ocr_1.png"
|
|
|
182 |
|
183 |
conversation = [
|
184 |
{
|
|
|
22 |
|
23 |
- **Developed by:** NC Research, Multimodal Generation Team
|
24 |
- **Technical Report:** [Coming Soon]()
|
25 |
+
- **Demo Page:** [Coming Soon]()
|
26 |
- **Languages:** Korean, English
|
27 |
- **License:** CC BY-NC 4.0
|
28 |
- **Architecture:** VARCO-VISION-14B follows the architecture of [LLaVA-OneVision](https://arxiv.org/abs/2408.03326).
|
29 |
- **Base Model:**
|
30 |
- **Language Model:** [Qwen/Qwen2.5-14B-Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct)
|
31 |
- **Vision Encoder:** [google/siglip-so400m-patch14-384](https://huggingface.co/google/siglip-so400m-patch14-384)
|
32 |
+
- **Huggingface Version Model:** [NCSOFT/VARCO-VISION-HF](https://huggingface.co/NCSOFT/VARCO-VISION-14B-HF)
|
33 |
|
34 |
|
35 |
|
|
|
51 |
import torch
|
52 |
from transformers import AutoTokenizer
|
53 |
from llava.model.language_model.llava_qwen import LlavaQwenForCausalLM
|
54 |
+
from llava.conversation import apply_chat_template
|
55 |
from llava.mm_utils import tokenizer_image_token, process_images
|
56 |
|
57 |
model_name = "NCSOFT/VARCO-VISION-14B"
|
|
|
182 |
|
183 |
```python
|
184 |
image_file = "./assets/ocr_1.png"
|
185 |
+
raw_image = Image.open(image_file)
|
186 |
|
187 |
conversation = [
|
188 |
{
|