End of training
Browse files
all_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
"eval_loss": 0.56689453125,
|
4 |
-
"eval_runtime":
|
5 |
-
"eval_samples_per_second": 1.
|
6 |
-
"eval_steps_per_second": 0.
|
7 |
"eval_wer": 9.899702823179792,
|
8 |
-
"train_loss": 1.
|
9 |
-
"train_runtime":
|
10 |
-
"train_samples_per_second":
|
11 |
-
"train_steps_per_second": 2.
|
12 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 646.82,
|
3 |
"eval_loss": 0.56689453125,
|
4 |
+
"eval_runtime": 153.5566,
|
5 |
+
"eval_samples_per_second": 1.771,
|
6 |
+
"eval_steps_per_second": 0.111,
|
7 |
"eval_wer": 9.899702823179792,
|
8 |
+
"train_loss": 1.3152035799893466e-05,
|
9 |
+
"train_runtime": 3841.6705,
|
10 |
+
"train_samples_per_second": 91.627,
|
11 |
+
"train_steps_per_second": 2.863
|
12 |
}
|
eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
"eval_loss": 0.56689453125,
|
4 |
-
"eval_runtime":
|
5 |
-
"eval_samples_per_second": 1.
|
6 |
-
"eval_steps_per_second": 0.
|
7 |
"eval_wer": 9.899702823179792
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 646.82,
|
3 |
"eval_loss": 0.56689453125,
|
4 |
+
"eval_runtime": 153.5566,
|
5 |
+
"eval_samples_per_second": 1.771,
|
6 |
+
"eval_steps_per_second": 0.111,
|
7 |
"eval_wer": 9.899702823179792
|
8 |
}
|
runs/Dec23_11-21-18_129-146-176-120/events.out.tfevents.1671798661.129-146-176-120.862675.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3384bedae216f607575a8ac5d62e171cd6060d79d5164dce041c5d86b8a4129
|
3 |
+
size 358
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 1.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second":
|
6 |
-
"train_steps_per_second": 2.
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 646.82,
|
3 |
+
"train_loss": 1.3152035799893466e-05,
|
4 |
+
"train_runtime": 3841.6705,
|
5 |
+
"train_samples_per_second": 91.627,
|
6 |
+
"train_steps_per_second": 2.863
|
7 |
}
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": 9.899702823179792,
|
3 |
"best_model_checkpoint": "./checkpoint-8000",
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2498,18 +2498,267 @@
|
|
2498 |
"step": 10000
|
2499 |
},
|
2500 |
{
|
2501 |
-
"epoch":
|
2502 |
-
"
|
2503 |
-
"
|
2504 |
-
"
|
2505 |
-
|
2506 |
-
|
2507 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2508 |
}
|
2509 |
],
|
2510 |
-
"max_steps":
|
2511 |
-
"num_train_epochs":
|
2512 |
-
"total_flos": 3.
|
2513 |
"trial_name": null,
|
2514 |
"trial_params": null
|
2515 |
}
|
|
|
1 |
{
|
2 |
"best_metric": 9.899702823179792,
|
3 |
"best_model_checkpoint": "./checkpoint-8000",
|
4 |
+
"epoch": 646.8235294117648,
|
5 |
+
"global_step": 11000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2498 |
"step": 10000
|
2499 |
},
|
2500 |
{
|
2501 |
+
"epoch": 589.47,
|
2502 |
+
"learning_rate": 2.8514285714285715e-07,
|
2503 |
+
"loss": 0.0002,
|
2504 |
+
"step": 10025
|
2505 |
+
},
|
2506 |
+
{
|
2507 |
+
"epoch": 590.94,
|
2508 |
+
"learning_rate": 2.78e-07,
|
2509 |
+
"loss": 0.0002,
|
2510 |
+
"step": 10050
|
2511 |
+
},
|
2512 |
+
{
|
2513 |
+
"epoch": 592.41,
|
2514 |
+
"learning_rate": 2.7085714285714285e-07,
|
2515 |
+
"loss": 0.0001,
|
2516 |
+
"step": 10075
|
2517 |
+
},
|
2518 |
+
{
|
2519 |
+
"epoch": 593.88,
|
2520 |
+
"learning_rate": 2.6371428571428574e-07,
|
2521 |
+
"loss": 0.0001,
|
2522 |
+
"step": 10100
|
2523 |
+
},
|
2524 |
+
{
|
2525 |
+
"epoch": 595.35,
|
2526 |
+
"learning_rate": 2.565714285714286e-07,
|
2527 |
+
"loss": 0.0001,
|
2528 |
+
"step": 10125
|
2529 |
+
},
|
2530 |
+
{
|
2531 |
+
"epoch": 596.82,
|
2532 |
+
"learning_rate": 2.4942857142857144e-07,
|
2533 |
+
"loss": 0.0001,
|
2534 |
+
"step": 10150
|
2535 |
+
},
|
2536 |
+
{
|
2537 |
+
"epoch": 598.29,
|
2538 |
+
"learning_rate": 2.4228571428571427e-07,
|
2539 |
+
"loss": 0.0002,
|
2540 |
+
"step": 10175
|
2541 |
+
},
|
2542 |
+
{
|
2543 |
+
"epoch": 599.76,
|
2544 |
+
"learning_rate": 2.3514285714285715e-07,
|
2545 |
+
"loss": 0.0002,
|
2546 |
+
"step": 10200
|
2547 |
+
},
|
2548 |
+
{
|
2549 |
+
"epoch": 601.24,
|
2550 |
+
"learning_rate": 2.28e-07,
|
2551 |
+
"loss": 0.0001,
|
2552 |
+
"step": 10225
|
2553 |
+
},
|
2554 |
+
{
|
2555 |
+
"epoch": 602.71,
|
2556 |
+
"learning_rate": 2.2085714285714288e-07,
|
2557 |
+
"loss": 0.0001,
|
2558 |
+
"step": 10250
|
2559 |
+
},
|
2560 |
+
{
|
2561 |
+
"epoch": 604.18,
|
2562 |
+
"learning_rate": 2.137142857142857e-07,
|
2563 |
+
"loss": 0.0001,
|
2564 |
+
"step": 10275
|
2565 |
+
},
|
2566 |
+
{
|
2567 |
+
"epoch": 605.65,
|
2568 |
+
"learning_rate": 2.0657142857142856e-07,
|
2569 |
+
"loss": 0.0001,
|
2570 |
+
"step": 10300
|
2571 |
+
},
|
2572 |
+
{
|
2573 |
+
"epoch": 607.12,
|
2574 |
+
"learning_rate": 1.9942857142857144e-07,
|
2575 |
+
"loss": 0.0001,
|
2576 |
+
"step": 10325
|
2577 |
+
},
|
2578 |
+
{
|
2579 |
+
"epoch": 608.59,
|
2580 |
+
"learning_rate": 1.922857142857143e-07,
|
2581 |
+
"loss": 0.0001,
|
2582 |
+
"step": 10350
|
2583 |
+
},
|
2584 |
+
{
|
2585 |
+
"epoch": 610.06,
|
2586 |
+
"learning_rate": 1.8514285714285715e-07,
|
2587 |
+
"loss": 0.0002,
|
2588 |
+
"step": 10375
|
2589 |
+
},
|
2590 |
+
{
|
2591 |
+
"epoch": 611.53,
|
2592 |
+
"learning_rate": 1.78e-07,
|
2593 |
+
"loss": 0.0001,
|
2594 |
+
"step": 10400
|
2595 |
+
},
|
2596 |
+
{
|
2597 |
+
"epoch": 613.0,
|
2598 |
+
"learning_rate": 1.7085714285714286e-07,
|
2599 |
+
"loss": 0.0003,
|
2600 |
+
"step": 10425
|
2601 |
+
},
|
2602 |
+
{
|
2603 |
+
"epoch": 614.47,
|
2604 |
+
"learning_rate": 1.637142857142857e-07,
|
2605 |
+
"loss": 0.0002,
|
2606 |
+
"step": 10450
|
2607 |
+
},
|
2608 |
+
{
|
2609 |
+
"epoch": 615.94,
|
2610 |
+
"learning_rate": 1.5657142857142857e-07,
|
2611 |
+
"loss": 0.0001,
|
2612 |
+
"step": 10475
|
2613 |
+
},
|
2614 |
+
{
|
2615 |
+
"epoch": 617.41,
|
2616 |
+
"learning_rate": 1.4942857142857145e-07,
|
2617 |
+
"loss": 0.0001,
|
2618 |
+
"step": 10500
|
2619 |
+
},
|
2620 |
+
{
|
2621 |
+
"epoch": 618.88,
|
2622 |
+
"learning_rate": 1.422857142857143e-07,
|
2623 |
+
"loss": 0.0002,
|
2624 |
+
"step": 10525
|
2625 |
+
},
|
2626 |
+
{
|
2627 |
+
"epoch": 620.35,
|
2628 |
+
"learning_rate": 1.3514285714285713e-07,
|
2629 |
+
"loss": 0.0001,
|
2630 |
+
"step": 10550
|
2631 |
+
},
|
2632 |
+
{
|
2633 |
+
"epoch": 621.82,
|
2634 |
+
"learning_rate": 1.28e-07,
|
2635 |
+
"loss": 0.0002,
|
2636 |
+
"step": 10575
|
2637 |
+
},
|
2638 |
+
{
|
2639 |
+
"epoch": 623.29,
|
2640 |
+
"learning_rate": 1.2085714285714286e-07,
|
2641 |
+
"loss": 0.0002,
|
2642 |
+
"step": 10600
|
2643 |
+
},
|
2644 |
+
{
|
2645 |
+
"epoch": 624.76,
|
2646 |
+
"learning_rate": 1.1371428571428573e-07,
|
2647 |
+
"loss": 0.0002,
|
2648 |
+
"step": 10625
|
2649 |
+
},
|
2650 |
+
{
|
2651 |
+
"epoch": 626.24,
|
2652 |
+
"learning_rate": 1.0657142857142858e-07,
|
2653 |
+
"loss": 0.0002,
|
2654 |
+
"step": 10650
|
2655 |
+
},
|
2656 |
+
{
|
2657 |
+
"epoch": 627.71,
|
2658 |
+
"learning_rate": 9.942857142857142e-08,
|
2659 |
+
"loss": 0.0002,
|
2660 |
+
"step": 10675
|
2661 |
+
},
|
2662 |
+
{
|
2663 |
+
"epoch": 629.18,
|
2664 |
+
"learning_rate": 9.228571428571429e-08,
|
2665 |
+
"loss": 0.0001,
|
2666 |
+
"step": 10700
|
2667 |
+
},
|
2668 |
+
{
|
2669 |
+
"epoch": 630.65,
|
2670 |
+
"learning_rate": 8.514285714285714e-08,
|
2671 |
+
"loss": 0.0001,
|
2672 |
+
"step": 10725
|
2673 |
+
},
|
2674 |
+
{
|
2675 |
+
"epoch": 632.12,
|
2676 |
+
"learning_rate": 7.8e-08,
|
2677 |
+
"loss": 0.0001,
|
2678 |
+
"step": 10750
|
2679 |
+
},
|
2680 |
+
{
|
2681 |
+
"epoch": 633.59,
|
2682 |
+
"learning_rate": 7.085714285714286e-08,
|
2683 |
+
"loss": 0.0002,
|
2684 |
+
"step": 10775
|
2685 |
+
},
|
2686 |
+
{
|
2687 |
+
"epoch": 635.06,
|
2688 |
+
"learning_rate": 6.371428571428572e-08,
|
2689 |
+
"loss": 0.0001,
|
2690 |
+
"step": 10800
|
2691 |
+
},
|
2692 |
+
{
|
2693 |
+
"epoch": 636.53,
|
2694 |
+
"learning_rate": 5.657142857142857e-08,
|
2695 |
+
"loss": 0.0001,
|
2696 |
+
"step": 10825
|
2697 |
+
},
|
2698 |
+
{
|
2699 |
+
"epoch": 638.0,
|
2700 |
+
"learning_rate": 4.942857142857143e-08,
|
2701 |
+
"loss": 0.0001,
|
2702 |
+
"step": 10850
|
2703 |
+
},
|
2704 |
+
{
|
2705 |
+
"epoch": 639.47,
|
2706 |
+
"learning_rate": 4.2285714285714285e-08,
|
2707 |
+
"loss": 0.0002,
|
2708 |
+
"step": 10875
|
2709 |
+
},
|
2710 |
+
{
|
2711 |
+
"epoch": 640.94,
|
2712 |
+
"learning_rate": 3.514285714285714e-08,
|
2713 |
+
"loss": 0.0001,
|
2714 |
+
"step": 10900
|
2715 |
+
},
|
2716 |
+
{
|
2717 |
+
"epoch": 642.41,
|
2718 |
+
"learning_rate": 2.8000000000000003e-08,
|
2719 |
+
"loss": 0.0001,
|
2720 |
+
"step": 10925
|
2721 |
+
},
|
2722 |
+
{
|
2723 |
+
"epoch": 643.88,
|
2724 |
+
"learning_rate": 2.0857142857142856e-08,
|
2725 |
+
"loss": 0.0001,
|
2726 |
+
"step": 10950
|
2727 |
+
},
|
2728 |
+
{
|
2729 |
+
"epoch": 645.35,
|
2730 |
+
"learning_rate": 1.3714285714285715e-08,
|
2731 |
+
"loss": 0.0001,
|
2732 |
+
"step": 10975
|
2733 |
+
},
|
2734 |
+
{
|
2735 |
+
"epoch": 646.82,
|
2736 |
+
"learning_rate": 6.571428571428572e-09,
|
2737 |
+
"loss": 0.0003,
|
2738 |
+
"step": 11000
|
2739 |
+
},
|
2740 |
+
{
|
2741 |
+
"epoch": 646.82,
|
2742 |
+
"eval_loss": 0.56689453125,
|
2743 |
+
"eval_runtime": 153.4429,
|
2744 |
+
"eval_samples_per_second": 1.773,
|
2745 |
+
"eval_steps_per_second": 0.111,
|
2746 |
+
"eval_wer": 9.946136701337295,
|
2747 |
+
"step": 11000
|
2748 |
+
},
|
2749 |
+
{
|
2750 |
+
"epoch": 646.82,
|
2751 |
+
"step": 11000,
|
2752 |
+
"total_flos": 3.388775639915157e+20,
|
2753 |
+
"train_loss": 1.3152035799893466e-05,
|
2754 |
+
"train_runtime": 3841.6705,
|
2755 |
+
"train_samples_per_second": 91.627,
|
2756 |
+
"train_steps_per_second": 2.863
|
2757 |
}
|
2758 |
],
|
2759 |
+
"max_steps": 11000,
|
2760 |
+
"num_train_epochs": 648,
|
2761 |
+
"total_flos": 3.388775639915157e+20,
|
2762 |
"trial_name": null,
|
2763 |
"trial_params": null
|
2764 |
}
|