File size: 41,571 Bytes
9754890
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import unicodedata\n",
    "\n",
    "def normalize_text(text):\n",
    "    if text is None:\n",
    "        return \"\"\n",
    "    text = unicodedata.normalize('NFKC', text)\n",
    "    return \" \".join(text.split())\n",
    "\n",
    "url = \"https://jrct.niph.go.jp/latest-detail/jRCT2051240150\"\n",
    "headers = {\n",
    "    \"User-Agent\": \"Mozilla/5.0\"\n",
    "}\n",
    "\n",
    "try:\n",
    "    response = requests.get(url, headers=headers, timeout=10)\n",
    "    response.raise_for_status()\n",
    "except requests.RequestException as e:\n",
    "    print(f\"URLリクエストに失敗しました: {url} - エラー: {e}\")\n",
    "    # エラーが出た場合はここで止まります\n",
    "    exit(1)\n",
    "\n",
    "# response.text をファイルに保存して確認しても良い\n",
    "# with open(\"debug_html.html\", \"w\", encoding=\"utf-8\") as f:\n",
    "#     f.write(response.text)\n",
    "\n",
    "soup = BeautifulSoup(response.text, 'html.parser')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----\n",
      "RAW: '研究の種別'\n",
      "NORMALIZED: '研究の種別'\n",
      "-----\n",
      "RAW: '治験の区分'\n",
      "NORMALIZED: '治験の区分'\n",
      "-----\n",
      "RAW: '初回公表日'\n",
      "NORMALIZED: '初回公表日'\n",
      "-----\n",
      "RAW: '最終公表日'\n",
      "NORMALIZED: '最終公表日'\n",
      "-----\n",
      "RAW: '中止年月日'\n",
      "NORMALIZED: '中止年月日'\n",
      "-----\n",
      "RAW: '観察期間終了日'\n",
      "NORMALIZED: '観察期間終了日'\n",
      "-----\n",
      "RAW: '研究名称'\n",
      "NORMALIZED: '研究名称'\n",
      "-----\n",
      "RAW: '平易な研究名称'\n",
      "NORMALIZED: '平易な研究名称'\n",
      "-----\n",
      "RAW: '研究責任(代表)医師の氏名'\n",
      "NORMALIZED: '研究責任(代表)医師の氏名'\n",
      "-----\n",
      "RAW: '研究責任(代表)医師の所属機関'\n",
      "NORMALIZED: '研究責任(代表)医師の所属機関'\n",
      "-----\n",
      "RAW: '研究・治験の目的'\n",
      "NORMALIZED: '研究・治験の目的'\n",
      "-----\n",
      "RAW: '試験のフェーズ'\n",
      "NORMALIZED: '試験のフェーズ'\n",
      "-----\n",
      "RAW: '対象疾患名'\n",
      "NORMALIZED: '対象疾患名'\n",
      "-----\n",
      "RAW: '進捗状況'\n",
      "NORMALIZED: '進捗状況'\n",
      "-----\n",
      "RAW: '医薬品等の一般名称'\n",
      "NORMALIZED: '医薬品等の一般名称'\n",
      "-----\n",
      "RAW: '販売名'\n",
      "NORMALIZED: '販売名'\n",
      "-----\n",
      "RAW: '認定委員会の名称'\n",
      "NORMALIZED: '認定委員会の名称'\n",
      "-----\n",
      "RAW: '認定番号'\n",
      "NORMALIZED: '認定番号'\n",
      "-----\n",
      "RAW: '                        試験等の名称                   /                       Scientific Title(Acronym)                                  '\n",
      "NORMALIZED: '試験等の名称 / Scientific Title(Acronym)'\n",
      "-----\n",
      "RAW: '                      平易な試験等の名称                 /                     Public Title(Acronym)                                '\n",
      "NORMALIZED: '平易な試験等の名称 / Public Title(Acronym)'\n",
      "-----\n",
      "RAW: '                            科学的な内容の問合せ先                                                    Contact for Scientific Queries                        '\n",
      "NORMALIZED: '科学的な内容の問合せ先 Contact for Scientific Queries'\n",
      "-----\n",
      "RAW: '氏名 / Name'\n",
      "NORMALIZED: '氏名 / Name'\n",
      "-----\n",
      "RAW: '                            e-Rad番号                        '\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '                            所属機関(実施医療機関)                        '\n",
      "NORMALIZED: '所属機関(実施医療機関)'\n",
      "-----\n",
      "RAW: '                            Affiliation                        '\n",
      "NORMALIZED: 'Affiliation'\n",
      "-----\n",
      "RAW: '                            所属部署                        '\n",
      "NORMALIZED: '所属部署'\n",
      "-----\n",
      "RAW: '                            所属機関の郵便番号                        '\n",
      "NORMALIZED: '所属機関の郵便番号'\n",
      "-----\n",
      "RAW: '                        所属機関の住所'\n",
      "NORMALIZED: '所属機関の住所'\n",
      "-----\n",
      "RAW: '                            Address                        '\n",
      "NORMALIZED: 'Address'\n",
      "-----\n",
      "RAW: '                            電話番号                        '\n",
      "NORMALIZED: '電話番号'\n",
      "-----\n",
      "RAW: '                            電子メールアドレス                        '\n",
      "NORMALIZED: '電子メールアドレス'\n",
      "-----\n",
      "RAW: '                            試験に関する問い合わせ先                                                    Contact for Public Queries                        '\n",
      "NORMALIZED: '試験に関する問い合わせ先 Contact for Public Queries'\n",
      "-----\n",
      "RAW: '                            担当者氏名                         /\\n                                                    Name                        '\n",
      "NORMALIZED: '担当者氏名 / Name'\n",
      "-----\n",
      "RAW: '                            担当者所属機関                         /                             Affiliation                        '\n",
      "NORMALIZED: '担当者所属機関 / Affiliation'\n",
      "-----\n",
      "RAW: '                            担当者所属部署                        '\n",
      "NORMALIZED: '担当者所属部署'\n",
      "-----\n",
      "RAW: '                            担当者所属機関の郵便番号                        '\n",
      "NORMALIZED: '担当者所属機関の郵便番号'\n",
      "-----\n",
      "RAW: '\\n                                                    担当者所属機関の住所                         /\\n                                                    Address                                            '\n",
      "NORMALIZED: '担当者所属機関の住所 / Address'\n",
      "-----\n",
      "RAW: '                            電話番号                        '\n",
      "NORMALIZED: '電話番号'\n",
      "-----\n",
      "RAW: '                            FAX番号                        '\n",
      "NORMALIZED: 'FAX番号'\n",
      "-----\n",
      "RAW: '                            電子メールアドレス                        '\n",
      "NORMALIZED: '電子メールアドレス'\n",
      "-----\n",
      "RAW: '\\n                                                    実施医療機関の長の氏名                        '\n",
      "NORMALIZED: '実施医療機関の長の氏名'\n",
      "-----\n",
      "RAW: '\\n                                                    当該試験等に対する管理者の許可の有無                        '\n",
      "NORMALIZED: '当該試験等に対する管理者の許可の有無'\n",
      "-----\n",
      "RAW: '\\n                                                    IRBの承認日                        '\n",
      "NORMALIZED: 'IRBの承認日'\n",
      "-----\n",
      "RAW: '\\n                                                    救急医療に必要な施設又は設備                        '\n",
      "NORMALIZED: '救急医療に必要な施設又は設備'\n",
      "-----\n",
      "RAW: 'データマネジメント担当機関'\n",
      "NORMALIZED: 'データマネジメント担当機関'\n",
      "-----\n",
      "RAW: 'データマネジメント担当責任者'\n",
      "NORMALIZED: 'データマネジメント担当責任者'\n",
      "-----\n",
      "RAW: '氏名'\n",
      "NORMALIZED: '氏名'\n",
      "-----\n",
      "RAW: 'e-Rad番号'\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '所属'\n",
      "NORMALIZED: '所属'\n",
      "-----\n",
      "RAW: '役職'\n",
      "NORMALIZED: '役職'\n",
      "-----\n",
      "RAW: 'モニタリング担当機関'\n",
      "NORMALIZED: 'モニタリング担当機関'\n",
      "-----\n",
      "RAW: 'モニタリング担当責任者'\n",
      "NORMALIZED: 'モニタリング担当責任者'\n",
      "-----\n",
      "RAW: '氏名'\n",
      "NORMALIZED: '氏名'\n",
      "-----\n",
      "RAW: 'e-Rad番号'\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '所属'\n",
      "NORMALIZED: '所属'\n",
      "-----\n",
      "RAW: '役職'\n",
      "NORMALIZED: '役職'\n",
      "-----\n",
      "RAW: '監査担当機関'\n",
      "NORMALIZED: '監査担当機関'\n",
      "-----\n",
      "RAW: '監査担当責任者'\n",
      "NORMALIZED: '監査担当責任者'\n",
      "-----\n",
      "RAW: '氏名'\n",
      "NORMALIZED: '氏名'\n",
      "-----\n",
      "RAW: 'e-Rad番号'\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '所属'\n",
      "NORMALIZED: '所属'\n",
      "-----\n",
      "RAW: '役職'\n",
      "NORMALIZED: '役職'\n",
      "-----\n",
      "RAW: '統計解析担当機関'\n",
      "NORMALIZED: '統計解析担当機関'\n",
      "-----\n",
      "RAW: '統計解析担当責任者'\n",
      "NORMALIZED: '統計解析担当責任者'\n",
      "-----\n",
      "RAW: '氏名'\n",
      "NORMALIZED: '氏名'\n",
      "-----\n",
      "RAW: 'e-Rad番号'\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '所属'\n",
      "NORMALIZED: '所属'\n",
      "-----\n",
      "RAW: '役職'\n",
      "NORMALIZED: '役職'\n",
      "-----\n",
      "RAW: '研究・開発計画支援担当機関'\n",
      "NORMALIZED: '研究・開発計画支援担当機関'\n",
      "-----\n",
      "RAW: '研究・開発計画支援担当責任者'\n",
      "NORMALIZED: '研究・開発計画支援担当責任者'\n",
      "-----\n",
      "RAW: '氏名'\n",
      "NORMALIZED: '氏名'\n",
      "-----\n",
      "RAW: 'e-Rad番号'\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '所属'\n",
      "NORMALIZED: '所属'\n",
      "-----\n",
      "RAW: '役職'\n",
      "NORMALIZED: '役職'\n",
      "-----\n",
      "RAW: '調整・管理実務担当機関'\n",
      "NORMALIZED: '調整・管理実務担当機関'\n",
      "-----\n",
      "RAW: '調整・管理実務担当責任者'\n",
      "NORMALIZED: '調整・管理実務担当責任者'\n",
      "-----\n",
      "RAW: '氏名'\n",
      "NORMALIZED: '氏名'\n",
      "-----\n",
      "RAW: 'e-Rad番号'\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '所属'\n",
      "NORMALIZED: '所属'\n",
      "-----\n",
      "RAW: '役職'\n",
      "NORMALIZED: '役職'\n",
      "-----\n",
      "RAW: 'その他の研究を総括する者'\n",
      "NORMALIZED: 'その他の研究を総括する者'\n",
      "-----\n",
      "RAW: '氏名 / Name'\n",
      "NORMALIZED: '氏名 / Name'\n",
      "-----\n",
      "RAW: 'e-Rad番号'\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '所属 / Affiliation'\n",
      "NORMALIZED: '所属 / Affiliation'\n",
      "-----\n",
      "RAW: '役職'\n",
      "NORMALIZED: '役職'\n",
      "-----\n",
      "RAW: 'Secondary Sponsor の該当性'\n",
      "NORMALIZED: 'Secondary Sponsor の該当性'\n",
      "-----\n",
      "RAW: '治験責任医師等の連絡先'\n",
      "NORMALIZED: '治験責任医師等の連絡先'\n",
      "-----\n",
      "RAW: '氏名'\n",
      "NORMALIZED: '氏名'\n",
      "-----\n",
      "RAW: 'Name'\n",
      "NORMALIZED: 'Name'\n",
      "-----\n",
      "RAW: 'e-Rad番号'\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '所属機関(実施医療機関)'\n",
      "NORMALIZED: '所属機関(実施医療機関)'\n",
      "-----\n",
      "RAW: 'Affiliation'\n",
      "NORMALIZED: 'Affiliation'\n",
      "-----\n",
      "RAW: '所属部署'\n",
      "NORMALIZED: '所属部署'\n",
      "-----\n",
      "RAW: '所属部署の郵便番号'\n",
      "NORMALIZED: '所属部署の郵便番号'\n",
      "-----\n",
      "RAW: '所属機関の住所'\n",
      "NORMALIZED: '所属機関の住所'\n",
      "-----\n",
      "RAW: '電話番号'\n",
      "NORMALIZED: '電話番号'\n",
      "-----\n",
      "RAW: '電子メールアドレス'\n",
      "NORMALIZED: '電子メールアドレス'\n",
      "-----\n",
      "RAW: '研究に関する問合わせ先'\n",
      "NORMALIZED: '研究に関する問合わせ先'\n",
      "-----\n",
      "RAW: '担当者氏名'\n",
      "NORMALIZED: '担当者氏名'\n",
      "-----\n",
      "RAW: '担当者所属機関'\n",
      "NORMALIZED: '担当者所属機関'\n",
      "-----\n",
      "RAW: '担当者所属部署'\n",
      "NORMALIZED: '担当者所属部署'\n",
      "-----\n",
      "RAW: '担当者所属機関の郵便番号'\n",
      "NORMALIZED: '担当者所属機関の郵便番号'\n",
      "-----\n",
      "RAW: '担当者所属機関の住所'\n",
      "NORMALIZED: '担当者所属機関の住所'\n",
      "-----\n",
      "RAW: '電話番号'\n",
      "NORMALIZED: '電話番号'\n",
      "-----\n",
      "RAW: 'FAX番号'\n",
      "NORMALIZED: 'FAX番号'\n",
      "-----\n",
      "RAW: '電子メールアドレス'\n",
      "NORMALIZED: '電子メールアドレス'\n",
      "-----\n",
      "RAW: '実施医療機関の長の氏名'\n",
      "NORMALIZED: '実施医療機関の長の氏名'\n",
      "-----\n",
      "RAW: '管理者の許可の有無'\n",
      "NORMALIZED: '管理者の許可の有無'\n",
      "-----\n",
      "RAW: 'IRBの承認日'\n",
      "NORMALIZED: 'IRBの承認日'\n",
      "-----\n",
      "RAW: '救急医療に必要な施設又は設備'\n",
      "NORMALIZED: '救急医療に必要な施設又は設備'\n",
      "-----\n",
      "RAW: '治験責任医師等の連絡先'\n",
      "NORMALIZED: '治験責任医師等の連絡先'\n",
      "-----\n",
      "RAW: '氏名'\n",
      "NORMALIZED: '氏名'\n",
      "-----\n",
      "RAW: 'Name'\n",
      "NORMALIZED: 'Name'\n",
      "-----\n",
      "RAW: 'e-Rad番号'\n",
      "NORMALIZED: 'e-Rad番号'\n",
      "-----\n",
      "RAW: '所属機関(実施医療機関)'\n",
      "NORMALIZED: '所属機関(実施医療機関)'\n",
      "-----\n",
      "RAW: 'Affiliation'\n",
      "NORMALIZED: 'Affiliation'\n",
      "-----\n",
      "RAW: '所属部署'\n",
      "NORMALIZED: '所属部署'\n",
      "-----\n",
      "RAW: '所属部署の郵便番号'\n",
      "NORMALIZED: '所属部署の郵便番号'\n",
      "-----\n",
      "RAW: '所属機関の住所'\n",
      "NORMALIZED: '所属機関の住所'\n",
      "-----\n",
      "RAW: '電話番号'\n",
      "NORMALIZED: '電話番号'\n",
      "-----\n",
      "RAW: '電子メールアドレス'\n",
      "NORMALIZED: '電子メールアドレス'\n",
      "-----\n",
      "RAW: '研究に関する問合わせ先'\n",
      "NORMALIZED: '研究に関する問合わせ先'\n",
      "-----\n",
      "RAW: '担当者氏名'\n",
      "NORMALIZED: '担当者氏名'\n",
      "-----\n",
      "RAW: '担当者所属機関'\n",
      "NORMALIZED: '担当者所属機関'\n",
      "-----\n",
      "RAW: '担当者所属部署'\n",
      "NORMALIZED: '担当者所属部署'\n",
      "-----\n",
      "RAW: '担当者所属機関の郵便番号'\n",
      "NORMALIZED: '担当者所属機関の郵便番号'\n",
      "-----\n",
      "RAW: '担当者所属機関の住所'\n",
      "NORMALIZED: '担当者所属機関の住所'\n",
      "-----\n",
      "RAW: '電話番号'\n",
      "NORMALIZED: '電話番号'\n",
      "-----\n",
      "RAW: 'FAX番号'\n",
      "NORMALIZED: 'FAX番号'\n",
      "-----\n",
      "RAW: '電子メールアドレス'\n",
      "NORMALIZED: '電子メールアドレス'\n",
      "-----\n",
      "RAW: '実施医療機関の長の氏名'\n",
      "NORMALIZED: '実施医療機関の長の氏名'\n",
      "-----\n",
      "RAW: '管理者の許可の有無'\n",
      "NORMALIZED: '管理者の許可の有無'\n",
      "-----\n",
      "RAW: 'IRBの承認日'\n",
      "NORMALIZED: 'IRBの承認日'\n",
      "-----\n",
      "RAW: '救急医療に必要な施設又は設備'\n",
      "NORMALIZED: '救急医療に必要な施設又は設備'\n",
      "-----\n",
      "RAW: 'ご参考資料 多施設共同研究機関情報'\n",
      "NORMALIZED: 'ご参考資料 多施設共同研究機関情報'\n",
      "-----\n",
      "RAW: '\\n                                    試験等の目的                                '\n",
      "NORMALIZED: '試験等の目的'\n",
      "-----\n",
      "RAW: '\\n                                  試験等のフェーズ               /\\n                                Phase                            '\n",
      "NORMALIZED: '試験等のフェーズ / Phase'\n",
      "-----\n",
      "RAW: '\\n                    症例登録開始予定日 / Date of First Enrollment                '\n",
      "NORMALIZED: '症例登録開始予定日 / Date of First Enrollment'\n",
      "-----\n",
      "RAW: '\\n                    第1症例登録日 / Date of First Enrollment                '\n",
      "NORMALIZED: '第1症例登録日 / Date of First Enrollment'\n",
      "-----\n",
      "RAW: '\\n                 実施期間(開始日)               '\n",
      "NORMALIZED: '実施期間(開始日)'\n",
      "-----\n",
      "RAW: '\\n\\n                          実施期間(終了日)                      \\n'\n",
      "NORMALIZED: '実施期間(終了日)'\n",
      "-----\n",
      "RAW: '\\n                          実施期間(終了日)                      '\n",
      "NORMALIZED: '実施期間(終了日)'\n",
      "-----\n",
      "RAW: '\\n                    実施予定被験者数 / Sample Size                '\n",
      "NORMALIZED: '実施予定被験者数 / Sample Size'\n",
      "-----\n",
      "RAW: '\\n                                    試験等の種類                 /\\n                                    Study Type                                '\n",
      "NORMALIZED: '試験等の種類 / Study Type'\n",
      "-----\n",
      "RAW: '\\n                                    試験等のデザイン                 \\n                                    Study Design                                '\n",
      "NORMALIZED: '試験等のデザイン Study Design'\n",
      "-----\n",
      "RAW: '\\n                                  無作為化                                 /                    allocation                                '\n",
      "NORMALIZED: '無作為化 / allocation'\n",
      "-----\n",
      "RAW: '\\n                                    盲検化                                                     /masking                                '\n",
      "NORMALIZED: '盲検化 /masking'\n",
      "-----\n",
      "RAW: '\\n                                    対照                                  /                    control                                '\n",
      "NORMALIZED: '対照 / control'\n",
      "-----\n",
      "RAW: '\\n                                    割付け                                  /                    assignment                                '\n",
      "NORMALIZED: '割付け / assignment'\n",
      "-----\n",
      "RAW: '\\n                                    研究目的                                  /                    purpose                                '\n",
      "NORMALIZED: '研究目的 / purpose'\n",
      "-----\n",
      "RAW: '\\n                                    プラセボの有無                                '\n",
      "NORMALIZED: 'プラセボの有無'\n",
      "-----\n",
      "RAW: '\\n                                    盲検の有無                                '\n",
      "NORMALIZED: '盲検の有無'\n",
      "-----\n",
      "RAW: '\\n                                    無作為化の有無                            '\n",
      "NORMALIZED: '無作為化の有無'\n",
      "-----\n",
      "RAW: '\\n                                    保険外併用療養費の有無                            '\n",
      "NORMALIZED: '保険外併用療養費の有無'\n",
      "-----\n",
      "RAW: '\\n                                    実施国(日本以外)                 /\\n                                    Countries of Recruitment(Except Japan)                                '\n",
      "NORMALIZED: '実施国(日本以外) / Countries of Recruitment(Except Japan)'\n",
      "-----\n",
      "RAW: '\\n                                    研究対象者の適格基準                                  /                        Key inclusion & exclusion criteria                                  '\n",
      "NORMALIZED: '研究対象者の適格基準 / Key inclusion & exclusion criteria'\n",
      "-----\n",
      "RAW: '\\n                                    主たる選択基準                                   /                     Inclusion Criteria                                  '\n",
      "NORMALIZED: '主たる選択基準 / Inclusion Criteria'\n",
      "-----\n",
      "RAW: '\\n                                    主たる除外基準                                  /                    Exclusion Criteria                                '\n",
      "NORMALIZED: '主たる除外基準 / Exclusion Criteria'\n",
      "-----\n",
      "RAW: '\\n                                    年齢下限                                  /                    Age Minimum                                '\n",
      "NORMALIZED: '年齢下限 / Age Minimum'\n",
      "-----\n",
      "RAW: '\\n                                    年齢上限                                  /                    Age Maximum                                '\n",
      "NORMALIZED: '年齢上限 / Age Maximum'\n",
      "-----\n",
      "RAW: '\\n                                    性別                                  /                    Gender                                '\n",
      "NORMALIZED: '性別 / Gender'\n",
      "-----\n",
      "RAW: '\\n                                    中止基準                                '\n",
      "NORMALIZED: '中止基準'\n",
      "-----\n",
      "RAW: '\\n                                    対象疾患名                                  /                        Health Condition(s) or Problem(s) Studied                                  '\n",
      "NORMALIZED: '対象疾患名 / Health Condition(s) or Problem(s) Studied'\n",
      "-----\n",
      "RAW: '\\n                                    対象疾患コード / Code                                '\n",
      "NORMALIZED: '対象疾患コード / Code'\n",
      "-----\n",
      "RAW: '\\n                                    対象疾患キーワード                                   /                         Keyword                    '\n",
      "NORMALIZED: '対象疾患キーワード / Keyword'\n",
      "-----\n",
      "RAW: '\\n                                    介入の有無                                 '\n",
      "NORMALIZED: '介入の有無'\n",
      "-----\n",
      "RAW: '\\n                                     介入の内容                                  /                        Intervention(s)                                  '\n",
      "NORMALIZED: '介入の内容 / Intervention(s)'\n",
      "-----\n",
      "RAW: '\\n                                   介入コード / Code                               '\n",
      "NORMALIZED: '介入コード / Code'\n",
      "-----\n",
      "RAW: '\\n                                   介入キーワード                                                    /Keyword                                 '\n",
      "NORMALIZED: '介入キーワード /Keyword'\n",
      "-----\n",
      "RAW: '\\n                                   主たる評価項目                                /                       Primary Outcome(s)                                 '\n",
      "NORMALIZED: '主たる評価項目 / Primary Outcome(s)'\n",
      "-----\n",
      "RAW: '\\n                                   副次的な評価項目                                /                       Secondary Outcome(s)                                 '\n",
      "NORMALIZED: '副次的な評価項目 / Secondary Outcome(s)'\n",
      "-----\n",
      "RAW: '医薬品、医療機器、再生医療等製品の別'\n",
      "NORMALIZED: '医薬品、医療機器、再生医療等製品の別'\n",
      "-----\n",
      "RAW: '医薬品医療機器等法における未承認、適応外、承認内の別'\n",
      "NORMALIZED: '医薬品医療機器等法における未承認、適応外、承認内の別'\n",
      "-----\n",
      "RAW: '一般名称等'\n",
      "NORMALIZED: '一般名称等'\n",
      "-----\n",
      "RAW: '医薬品'\n",
      "NORMALIZED: '医薬品'\n",
      "-----\n",
      "RAW: '一般名称'\n",
      "NORMALIZED: '一般名称'\n",
      "-----\n",
      "RAW: '販売名(海外製品の場合は国名も記載すること)'\n",
      "NORMALIZED: '販売名(海外製品の場合は国名も記載すること)'\n",
      "-----\n",
      "RAW: '承認番号'\n",
      "NORMALIZED: '承認番号'\n",
      "-----\n",
      "RAW: '被験薬等提供者'\n",
      "NORMALIZED: '被験薬等提供者'\n",
      "-----\n",
      "RAW: '名称'\n",
      "NORMALIZED: '名称'\n",
      "-----\n",
      "RAW: '所在地'\n",
      "NORMALIZED: '所在地'\n",
      "-----\n",
      "RAW: '監査の実施予定の有無'\n",
      "NORMALIZED: '監査の実施予定の有無'\n",
      "-----\n",
      "RAW: '試験等の進捗状況'\n",
      "NORMALIZED: '試験等の進捗状況'\n",
      "-----\n",
      "RAW: '進捗状況'\n",
      "NORMALIZED: '進捗状況'\n",
      "-----\n",
      "RAW: 'Recruitment status'\n",
      "NORMALIZED: 'Recruitment status'\n",
      "-----\n",
      "RAW: '主たる評価項目に係る研究成果'\n",
      "NORMALIZED: '主たる評価項目に係る研究成果'\n",
      "-----\n",
      "RAW: 'Summary Results (Primary Outcome Results)'\n",
      "NORMALIZED: 'Summary Results (Primary Outcome Results)'\n",
      "-----\n",
      "RAW: '\\n                研究対象者への補償の有無            '\n",
      "NORMALIZED: '研究対象者への補償の有無'\n",
      "-----\n",
      "RAW: '\\n                 補償の内容             '\n",
      "NORMALIZED: '補償の内容'\n",
      "-----\n",
      "RAW: '\\n                保険への加入の有無            '\n",
      "NORMALIZED: '保険への加入の有無'\n",
      "-----\n",
      "RAW: '\\n                保険の補償内容            '\n",
      "NORMALIZED: '保険の補償内容'\n",
      "-----\n",
      "RAW: '\\n                保険以外の補償の内容            '\n",
      "NORMALIZED: '保険以外の補償の内容'\n",
      "-----\n",
      "RAW: '依頼者等の名称'\n",
      "NORMALIZED: '依頼者等の名称'\n",
      "-----\n",
      "RAW: 'Primary Sponsor'\n",
      "NORMALIZED: 'Primary Sponsor'\n",
      "-----\n",
      "RAW: '研究資金提供の有無'\n",
      "NORMALIZED: '研究資金提供の有無'\n",
      "-----\n",
      "RAW: 'Source of Monetary Support'\n",
      "NORMALIZED: 'Source of Monetary Support'\n",
      "-----\n",
      "RAW: '契約締結の有無'\n",
      "NORMALIZED: '契約締結の有無'\n",
      "-----\n",
      "RAW: '契約締結日'\n",
      "NORMALIZED: '契約締結日'\n",
      "-----\n",
      "RAW: '物品提供の有無'\n",
      "NORMALIZED: '物品提供の有無'\n",
      "-----\n",
      "RAW: '物品提供の内容'\n",
      "NORMALIZED: '物品提供の内容'\n",
      "-----\n",
      "RAW: '役務提供の有無'\n",
      "NORMALIZED: '役務提供の有無'\n",
      "-----\n",
      "RAW: '役務提供の内容'\n",
      "NORMALIZED: '役務提供の内容'\n",
      "-----\n",
      "RAW: '研究資金等の提供組織の有無'\n",
      "NORMALIZED: '研究資金等の提供組織の有無'\n",
      "-----\n",
      "RAW: '研究資金等の提供組織名称 / Source of Monetary Support'\n",
      "NORMALIZED: '研究資金等の提供組織名称 / Source of Monetary Support'\n",
      "-----\n",
      "RAW: 'Secondary Sponsorの該当性'\n",
      "NORMALIZED: 'Secondary Sponsorの該当性'\n",
      "-----\n",
      "RAW: '\\n                                            IRBの名称                                        /                           Name of IRB                                    '\n",
      "NORMALIZED: 'IRBの名称 / Name of IRB'\n",
      "-----\n",
      "RAW: '\\n                                            住所                                        /                         Address                                    '\n",
      "NORMALIZED: '住所 / Address'\n",
      "-----\n",
      "RAW: '\\n                                            電話番号                                    '\n",
      "NORMALIZED: '電話番号'\n",
      "-----\n",
      "RAW: '\\n                                            電子メールアドレス                                    '\n",
      "NORMALIZED: '電子メールアドレス'\n",
      "-----\n",
      "RAW: '\\n                    審査受付番号\\n                '\n",
      "NORMALIZED: '審査受付番号'\n",
      "-----\n",
      "RAW: '\\n                                            当該試験等に対する審査結果                                    '\n",
      "NORMALIZED: '当該試験等に対する審査結果'\n",
      "-----\n",
      "RAW: '\\n                  他の臨床研究登録機関発行の研究番号                '\n",
      "NORMALIZED: '他の臨床研究登録機関発行の研究番号'\n",
      "-----\n",
      "RAW: '\\n                  他の臨床研究登録機関の名称                '\n",
      "NORMALIZED: '他の臨床研究登録機関の名称'\n",
      "-----\n",
      "RAW: '\\n                  Issuing Authority                '\n",
      "NORMALIZED: 'Issuing Authority'\n",
      "-----\n",
      "RAW: '\\n                             備考                        '\n",
      "NORMALIZED: '備考'\n",
      "-----\n",
      "RAW: '\\n                             国際共同研究                        '\n",
      "NORMALIZED: '国際共同研究'\n",
      "-----\n",
      "RAW: '\\n                             遺伝子治療等臨床研究に関する指針(平成27年厚生労働省令第344号)の対象となる試験等                        '\n",
      "NORMALIZED: '遺伝子治療等臨床研究に関する指針(平成27年厚生労働省令第344号)の対象となる試験等'\n",
      "-----\n",
      "RAW: '\\n                             遺伝子組換え生物等の使用等の規制による生物の多様性の確保に関する法律(平成15年法律第97号)の対象となる薬物を用いる試験等                        '\n",
      "NORMALIZED: '遺伝子組換え生物等の使用等の規制による生物の多様性の確保に関する法律(平成15年法律第97号)の対象となる薬物を用いる試験等'\n",
      "-----\n",
      "RAW: '\\n                             生物由来製品に指定が見込まれる薬物を用いる試験等                        '\n",
      "NORMALIZED: '生物由来製品に指定が見込まれる薬物を用いる試験等'\n",
      "-----\n",
      "RAW: '\\n                            IPDデータを共有する計画 / Plan to share IPD\\n                        '\n",
      "NORMALIZED: 'IPDデータを共有する計画 / Plan to share IPD'\n",
      "-----\n",
      "RAW: '\\n                            計画の説明 / Plan description\\n                        '\n",
      "NORMALIZED: '計画の説明 / Plan description'\n",
      "-----\n",
      "RAW: '\\n                                     その他1                                 '\n",
      "NORMALIZED: 'その他1'\n",
      "-----\n",
      "RAW: '\\n                                 その他2                             '\n",
      "NORMALIZED: 'その他2'\n",
      "-----\n",
      "RAW: '\\n                                 その他3                             '\n",
      "NORMALIZED: 'その他3'\n",
      "-----\n",
      "RAW: '2-1 その他の添付資料1'\n",
      "NORMALIZED: '2-1 その他の添付資料1'\n",
      "-----\n",
      "RAW: '2-2 その他の添付資料2'\n",
      "NORMALIZED: '2-2 その他の添付資料2'\n"
     ]
    }
   ],
   "source": [
    "for l in soup.find_all('label'):\n",
    "    raw_text = l.get_text()\n",
    "    normalized = normalize_text(raw_text)\n",
    "    print(\"-----\")\n",
    "    print(\"RAW:\", repr(raw_text))\n",
    "    print(\"NORMALIZED:\", repr(normalized))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FOUND LABEL:\n",
      "NORMALIZED: 対象疾患名\n"
     ]
    }
   ],
   "source": [
    "target_jp = \"対象疾患名\"\n",
    "found_label = None\n",
    "for l in soup.find_all('label'):\n",
    "    normalized = normalize_text(l.get_text())\n",
    "    if target_jp in normalized:\n",
    "        found_label = l\n",
    "        print(\"FOUND LABEL:\")\n",
    "        print(\"NORMALIZED:\", normalized)\n",
    "        break\n",
    "\n",
    "if not found_label:\n",
    "    print(\"対象疾患名 を含むラベルが見つかりませんでした。\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of TDs: 1\n",
      "TD[0]: 'ラブドイド腫瘍'\n"
     ]
    }
   ],
   "source": [
    "if found_label:\n",
    "    th = found_label.find_parent('th')\n",
    "    if th:\n",
    "        tr = th.find_parent('tr')\n",
    "        if tr:\n",
    "            tds = tr.find_all('td')\n",
    "            print(\"Number of TDs:\", len(tds))\n",
    "            for i, td in enumerate(tds):\n",
    "                print(f\"TD[{i}]:\", repr(normalize_text(td.get_text())))\n",
    "        else:\n",
    "            print(\"Parent <tr> not found.\")\n",
    "    else:\n",
    "        print(\"Parent <th> not found.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_label_data(label_text, label_en=None):\n",
    "    for l in soup.find_all('label'):\n",
    "        lt = l.get_text()\n",
    "        normalized_lt = normalize_text(lt)\n",
    "        if label_text in normalized_lt:\n",
    "            # 該当の<label>が見つかった場合の処理\n",
    "            th = l.find_parent('th')\n",
    "            if not th:\n",
    "                return None, None\n",
    "            tr = th.find_parent('tr')\n",
    "            if not tr:\n",
    "                return None, None\n",
    "            tds = tr.find_all('td')\n",
    "            if len(tds) == 0:\n",
    "                return None, None\n",
    "            \n",
    "            jp_data = normalize_text(tds[0].get_text()) if len(tds) > 0 else None\n",
    "            en_data = normalize_text(tds[1].get_text()) if label_en and len(tds) > 1 else None\n",
    "            return jp_data, en_data\n",
    "    return None, None\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('ラブドイド腫瘍', None)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "extract_label_data(\"対象疾患名\", \"Health Condition(s) or Problem(s) Studied\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('ラブドイド腫瘍', None)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "extract_label_data(\"対象疾患名\", \"Health Condition(s) or Problem(s) Studied\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_label_data(label_text, label_en=None):\n",
    "    for l in soup.find_all('label'):\n",
    "        lt = l.get_text()\n",
    "        normalized_lt = normalize_text(lt)\n",
    "        # 日本語だけでなく、英語のフレーズも含めてチェック\n",
    "        # label_text + \" / \" + label_en という完全一致に近い形で探索する\n",
    "        combined_text = label_text + \" / \" + label_en\n",
    "        if combined_text in normalized_lt:\n",
    "            # 該当<label>が見つかったら、ここで<th>→<tr>→<td>を辿る\n",
    "            th = l.find_parent('th')\n",
    "            if not th:\n",
    "                return None, None\n",
    "            tr = th.find_parent('tr')\n",
    "            if not tr:\n",
    "                return None, None\n",
    "            tds = tr.find_all('td')\n",
    "            if len(tds) == 0:\n",
    "                return None, None\n",
    "\n",
    "            jp_data = normalize_text(tds[0].get_text()) if len(tds) > 0 else None\n",
    "            en_data = normalize_text(tds[1].get_text()) if label_en and len(tds) > 1 else None\n",
    "            return jp_data, en_data\n",
    "    return None, None\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('ラブドイド腫瘍', 'rhabdoid tumor')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "extract_label_data(\"対象疾患名\", \"Health Condition(s) or Problem(s) Studied\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_label_data_all(label_text, label_en):\n",
    "    results = []\n",
    "    combined_text = label_text + \" / \" + label_en\n",
    "    for l in soup.find_all('label'):\n",
    "        normalized_lt = normalize_text(l.get_text())\n",
    "        if combined_text in normalized_lt:\n",
    "            th = l.find_parent('th')\n",
    "            if not th:\n",
    "                continue\n",
    "            tr = th.find_parent('tr')\n",
    "            if not tr:\n",
    "                continue\n",
    "            tds = tr.find_all('td')\n",
    "            if len(tds) < 2:\n",
    "                continue\n",
    "            jp_data = normalize_text(tds[0].get_text())\n",
    "            en_data = normalize_text(tds[1].get_text())\n",
    "            results.append((jp_data, en_data))\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('ラブドイド腫瘍', 'rhabdoid tumor')]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "extract_label_data_all(\"対象疾患名\", \"Health Condition(s) or Problem(s) Studied\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_results = extract_label_data_all(\"対象疾患名\", \"Health Condition(s) or Problem(s) Studied\")\n",
    "for jp, en in all_results:\n",
    "    if \"H3 K27M\" in jp and \"Newly Diagnosed\" in en:\n",
    "        print(\"Found desired pair:\", jp, en)\n",
    "        # ここでbreakするなり、returnするなりして終了\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "gradio",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}