auerchristoph commited on
Commit
576a105
·
verified ·
1 Parent(s): 1f62407

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +79 -4
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  base_model:
3
- - HuggingFaceTB/SmolVLM-256M-Instruct
4
  language:
5
  - en
6
  library_name: transformers
@@ -16,9 +16,84 @@ Refer to the [original model card](https://huggingface.co/ds4sd/SmolDocling-256M
16
  ## Use with mlx
17
 
18
  ```bash
19
- pip install -U mlx-vlm
20
  ```
21
 
22
- ```bash
23
- python -m mlx_vlm.generate --model ds4sd/SmolDocling-256M-preview-mlx-bf16 --max-tokens 100 --temperature 0.0 --prompt "Describe this image." --image <path_to_image>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ```
 
1
  ---
2
  base_model:
3
+ - ds4sd/SmolDocling-256M-preview
4
  language:
5
  - en
6
  library_name: transformers
 
16
  ## Use with mlx
17
 
18
  ```bash
19
+ pip install -U mlx-vlm pillow
20
  ```
21
 
22
+ ```python
23
+ # /// script
24
+ # requires-python = ">=3.12"
25
+ # dependencies = [
26
+ # "mlx-vlm",
27
+ # "pillow",
28
+ # ]
29
+ # ///
30
+ from io import BytesIO
31
+ from pathlib import Path
32
+ from urllib.parse import urlparse
33
+
34
+ import requests
35
+ from PIL import Image
36
+ from docling_core.types.doc import ImageRefMode
37
+ from docling_core.types.doc.document import DocTagsDocument, DoclingDocument
38
+ from mlx_vlm import load, generate
39
+ from mlx_vlm.prompt_utils import apply_chat_template
40
+ from mlx_vlm.utils import load_config, stream_generate
41
+
42
+ ## Settings
43
+ SHOW_IN_BROWSER = True # Export output as HTML and open in webbrowser.
44
+
45
+ ## Load the model
46
+ model_path = "ds4sd/SmolDocling-256M-preview-mlx-bf16"
47
+ model, processor = load(model_path)
48
+ config = load_config(model_path)
49
+
50
+ ## Prepare input
51
+ prompt = "Convert this page to docling."
52
+
53
+ # image = "https://ibm.biz/docling-page-with-list"
54
+ image = "https://ibm.biz/docling-page-with-table"
55
+
56
+ # Load image resource
57
+ if urlparse(image).scheme != "": # it is a URL
58
+ response = requests.get(image, stream=True, timeout=10)
59
+ response.raise_for_status()
60
+ pil_image = Image.open(BytesIO(response.content))
61
+ else:
62
+ pil_image = Image.open(image)
63
+
64
+ # Apply chat template
65
+ formatted_prompt = apply_chat_template(processor, config, prompt, num_images=1)
66
+
67
+ ## Generate output
68
+ print("DocTags: \n\n")
69
+
70
+ output = ""
71
+ for token in stream_generate(
72
+ model, processor, formatted_prompt, [image], max_tokens=4096, verbose=False
73
+ ):
74
+ output += token.text
75
+ print(token.text, end="")
76
+ if "</doctag>" in token.text:
77
+ break
78
+
79
+ print("\n\n")
80
+
81
+ # Populate document
82
+ doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([output], [pil_image])
83
+ # create a docling document
84
+ doc = DoclingDocument(name="SampleDocument")
85
+ doc.load_from_doctags(doctags_doc)
86
+
87
+ ## Export as any format
88
+ # Markdown
89
+ print("Markdown: \n\n")
90
+ print(doc.export_to_markdown())
91
+
92
+ # HTML
93
+ if SHOW_IN_BROWSER:
94
+ import webbrowser
95
+
96
+ out_path = Path("./output.html")
97
+ doc.save_as_html(out_path, image_mode=ImageRefMode.EMBEDDED)
98
+ webbrowser.open(f"file:///{str(out_path.resolve())}")
99
  ```