geekyrakshit commited on
Commit
4069faf
·
1 Parent(s): 86ac070

add: basic workflow to check code format and lint

Browse files
.github/workflows/tests.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Tests
2
+ on:
3
+ pull_request:
4
+ paths:
5
+ - .github/workflows/tests.yml
6
+ - medrag_multi_modal/**
7
+ - pyproject.toml
8
+
9
+ jobs:
10
+ code-format:
11
+ name: check code format using black
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v3
15
+ - uses: psf/black@stable
16
+ lint:
17
+ name: Check linting using ruff
18
+ runs-on: ubuntu-latest
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - uses: chartboost/ruff-action@v1
medrag_multi_modal/document_loader/load_image.py CHANGED
@@ -3,11 +3,11 @@ import os
3
  from typing import Optional
4
 
5
  import rich
6
- import wandb
7
  import weave
8
  from pdf2image.pdf2image import convert_from_path
9
  from PIL import Image
10
 
 
11
  from medrag_multi_modal.document_loader.text_loader import PyMuPDF4LLMTextLoader
12
 
13
 
 
3
  from typing import Optional
4
 
5
  import rich
 
6
  import weave
7
  from pdf2image.pdf2image import convert_from_path
8
  from PIL import Image
9
 
10
+ import wandb
11
  from medrag_multi_modal.document_loader.text_loader import PyMuPDF4LLMTextLoader
12
 
13
 
test.ipynb ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import rich\n",
10
+ "import weave\n",
11
+ "from dotenv import load_dotenv\n",
12
+ "\n",
13
+ "from medrag_multi_modal.retrieval import BM25sRetriever"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 2,
19
+ "metadata": {},
20
+ "outputs": [
21
+ {
22
+ "name": "stdout",
23
+ "output_type": "stream",
24
+ "text": [
25
+ "Logged in as Weights & Biases user: geekyrakshit.\n",
26
+ "View Weave data at https://wandb.ai/ml-colabs/medrag-multi-modal/weave\n"
27
+ ]
28
+ },
29
+ {
30
+ "data": {
31
+ "text/plain": [
32
+ "<weave.trace.weave_client.WeaveClient at 0x31bb4b200>"
33
+ ]
34
+ },
35
+ "execution_count": 2,
36
+ "metadata": {},
37
+ "output_type": "execute_result"
38
+ }
39
+ ],
40
+ "source": [
41
+ "load_dotenv()\n",
42
+ "weave.init(project_name=\"ml-colabs/medrag-multi-modal\")"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 3,
48
+ "metadata": {},
49
+ "outputs": [
50
+ {
51
+ "name": "stderr",
52
+ "output_type": "stream",
53
+ "text": [
54
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.\n",
55
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 7 of 7 files downloaded. \n"
56
+ ]
57
+ }
58
+ ],
59
+ "source": [
60
+ "retriever = BM25sRetriever.from_wandb_artifact(\n",
61
+ " index_artifact_address=\"ml-colabs/medrag-multi-modal/grays-anatomy-bm25s:v2\"\n",
62
+ ")"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 4,
68
+ "metadata": {},
69
+ "outputs": [
70
+ {
71
+ "data": {
72
+ "application/vnd.jupyter.widget-view+json": {
73
+ "model_id": "6247f921c889469283505348967807da",
74
+ "version_major": 2,
75
+ "version_minor": 0
76
+ },
77
+ "text/plain": [
78
+ "Split strings: 0%| | 0/1 [00:00<?, ?it/s]"
79
+ ]
80
+ },
81
+ "metadata": {},
82
+ "output_type": "display_data"
83
+ },
84
+ {
85
+ "data": {
86
+ "application/vnd.jupyter.widget-view+json": {
87
+ "model_id": "0ccb25cf58c84023846d68561962adc5",
88
+ "version_major": 2,
89
+ "version_minor": 0
90
+ },
91
+ "text/plain": [
92
+ "Stem Tokens: 0%| | 0/1 [00:00<?, ?it/s]"
93
+ ]
94
+ },
95
+ "metadata": {},
96
+ "output_type": "display_data"
97
+ },
98
+ {
99
+ "data": {
100
+ "application/vnd.jupyter.widget-view+json": {
101
+ "model_id": "d2eddb186fac447d8e7dc8f185ce7c86",
102
+ "version_major": 2,
103
+ "version_minor": 0
104
+ },
105
+ "text/plain": [
106
+ "BM25S Retrieve: 0%| | 0/1 [00:00<?, ?it/s]"
107
+ ]
108
+ },
109
+ "metadata": {},
110
+ "output_type": "display_data"
111
+ }
112
+ ],
113
+ "source": [
114
+ "results = retriever.retrieve(query=\"What are Ribosomes?\")"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": 15,
120
+ "metadata": {},
121
+ "outputs": [
122
+ {
123
+ "data": {
124
+ "text/html": [
125
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">class</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span><span style=\"color: #008000; text-decoration-color: #008000\">'dict'</span><span style=\"font-weight: bold\">&gt;</span>\n",
126
+ "</pre>\n"
127
+ ],
128
+ "text/plain": [
129
+ "\u001b[1m<\u001b[0m\u001b[1;95mclass\u001b[0m\u001b[39m \u001b[0m\u001b[32m'dict'\u001b[0m\u001b[1m>\u001b[0m\n"
130
+ ]
131
+ },
132
+ "metadata": {},
133
+ "output_type": "display_data"
134
+ }
135
+ ],
136
+ "source": [
137
+ "rich.print(list(list(results['results'])[0])[0])"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": 21,
143
+ "metadata": {},
144
+ "outputs": [
145
+ {
146
+ "data": {
147
+ "text/plain": [
148
+ "2"
149
+ ]
150
+ },
151
+ "execution_count": 21,
152
+ "metadata": {},
153
+ "output_type": "execute_result"
154
+ }
155
+ ],
156
+ "source": [
157
+ "len()"
158
+ ]
159
+ },
160
+ {
161
+ "cell_type": "code",
162
+ "execution_count": 19,
163
+ "metadata": {},
164
+ "outputs": [
165
+ {
166
+ "data": {
167
+ "text/plain": [
168
+ "[0.4504720866680145, 0.3982057571411133]"
169
+ ]
170
+ },
171
+ "execution_count": 19,
172
+ "metadata": {},
173
+ "output_type": "execute_result"
174
+ }
175
+ ],
176
+ "source": [
177
+ "results[\"scores\"].flatten().tolist()"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": null,
183
+ "metadata": {},
184
+ "outputs": [],
185
+ "source": []
186
+ }
187
+ ],
188
+ "metadata": {
189
+ "kernelspec": {
190
+ "display_name": ".venv",
191
+ "language": "python",
192
+ "name": "python3"
193
+ },
194
+ "language_info": {
195
+ "codemirror_mode": {
196
+ "name": "ipython",
197
+ "version": 3
198
+ },
199
+ "file_extension": ".py",
200
+ "mimetype": "text/x-python",
201
+ "name": "python",
202
+ "nbconvert_exporter": "python",
203
+ "pygments_lexer": "ipython3",
204
+ "version": "3.12.7"
205
+ }
206
+ },
207
+ "nbformat": 4,
208
+ "nbformat_minor": 2
209
+ }