ciyazzzk
commited on
Commit
·
6d1add3
1
Parent(s):
cd75d30
First model version
Browse files- config.json +26 -0
- final__ssddata_ruochen_Reasoning_eval_Mix-7B-SFT_3_code.jsonl +31 -0
- generation_config.json +6 -0
- model-00001-of-00003.safetensors +3 -0
- model-00002-of-00003.safetensors +3 -0
- model-00003-of-00003.safetensors +3 -0
- model.safetensors.index.json +298 -0
- special_tokens_map.json +24 -0
- tokenizer.model +3 -0
- tokenizer_config.json +44 -0
- trainer_state.json +1604 -0
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "mistralai/Mistral-7B-v0.1",
|
3 |
+
"architectures": [
|
4 |
+
"MistralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 14336,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"model_type": "mistral",
|
15 |
+
"num_attention_heads": 32,
|
16 |
+
"num_hidden_layers": 32,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"rms_norm_eps": 1e-05,
|
19 |
+
"rope_theta": 10000.0,
|
20 |
+
"sliding_window": 4096,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.42.3",
|
24 |
+
"use_cache": false,
|
25 |
+
"vocab_size": 32000
|
26 |
+
}
|
final__ssddata_ruochen_Reasoning_eval_Mix-7B-SFT_3_code.jsonl
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"task_id": "Python/0", "generation": "from typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n \"\"\" Check if in given list of numbers, are any two numbers closer to each other than\n given threshold.\n >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n False\n >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n True\n \"\"\"\n\n\n for i in range(len(numbers)):\n for j in range(i + 1, len(numbers)):\n if abs(numbers[i] - numbers[j]) < threshold:\n return True\n return False\n\n", "prompt": "from typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n \"\"\" Check if in given list of numbers, are any two numbers closer to each other than\n given threshold.\n >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n False\n >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n True\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n \"\"\" Check if in given list of numbers, are any two numbers closer to each other than\n given threshold.\n >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n False\n >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n True\n \"\"\"\n for i in range(len(numbers)):\n for j in range(i + 1, len(numbers)):\n if abs(numbers[i] - numbers[j]) < threshold:\n return True\n return False\n\n\ndef find_closest_pairs(numbers: List[float], threshold: float) -> List[List[float]]:\n \"\"\" Find all pairs of numbers in the list that are closer to each other than the given threshold.\n >>> find_closest_pairs([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n [[1.0, 2.0], [2.0, 2.8], [2.0, 3.0], [2.0, 4.0], [2.0, 5.0]]\n \"\"\"\n closest_pairs = []\n for i in range(len(numbers)):\n for j in range(i + 1, len(numbers)):\n if abs(numbers[i] - numbers[j]) < threshold:\n closest_pairs.append([numbers[i], numbers[j]])\n return closest_pairs\n\n\ndef find_closest_pairs_with_counts(numbers: List[float], threshold: float) -> List[List[float]]:\n \"\"\" Find all pairs of numbers in the list that are closer to each other than the given threshold.\n Also, count how many times each pair appears.\n >>> find_closest_pairs_with_counts([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n [[1.0, 2.0], [2.0, 2.8], [2.0, 3.0], [2.0, 4.0], [2.0, 5.0]]\n \"\"\"\n closest_pairs = []\n counts = {}\n for i in range(len(numbers)):\n for j in range(i + "}
|
2 |
+
{"task_id": "Python/1", "generation": "from typing import List\n\n\ndef separate_paren_groups(paren_string: str) -> List[str]:\n \"\"\" Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\n separate those group into separate strings and return the list of those.\n Separate groups are balanced (each open brace is properly closed) and not nested within each other\n Ignore any spaces in the input string.\n >>> separate_paren_groups('( ) (( )) (( )( ))')\n ['()', '(())', '(()())']\n \"\"\"\n\n\n # Initialize an empty list to store the separated groups\n separated_groups = []\n # Initialize a stack to keep track of the current group\n stack = []\n # Iterate through the string\n for char in paren_string:\n # If the current character is an open parenthesis, push it to the stack\n if char == '(':\n stack.append(char)\n # If the current character is a close parenthesis, pop the stack until the corresponding open parenthesis is found\n elif char == ')':\n while stack and stack[-1] != '(':\n stack.pop()\n # If the stack is not empty and the top element is an open parenthesis, pop both\n if stack and stack[-1] == '(':\n stack.pop()\n # If the current character is a space, ignore it\n elif char == ' ':\n continue\n # If the current character is not a space, add the current group to the separated groups list\n else:\n # If the current character is not a space, add the current group to the separated groups list\n if stack:\n separated_groups.append(''.join(stack))\n stack = []\n # Add the remaining characters in the stack to the separated groups list\n if stack:\n separated_groups.append(''.join(stack))\n return separated_groups\n\n", "prompt": "from typing import List\n\n\ndef separate_paren_groups(paren_string: str) -> List[str]:\n \"\"\" Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\n separate those group into separate strings and return the list of those.\n Separate groups are balanced (each open brace is properly closed) and not nested within each other\n Ignore any spaces in the input string.\n >>> separate_paren_groups('( ) (( )) (( )( ))')\n ['()', '(())', '(()())']\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef separate_paren_groups(paren_string: str) -> List[str]:\n \"\"\" Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\n separate those group into separate strings and return the list of those.\n Separate groups are balanced (each open brace is properly closed) and not nested within each other\n Ignore any spaces in the input string.\n >>> separate_paren_groups('( ) (( )) (( )( ))')\n ['()', '(())', '(()())']\n \"\"\"\n # Initialize an empty list to store the separated groups\n separated_groups = []\n # Initialize a stack to keep track of the current group\n stack = []\n # Iterate through the string\n for char in paren_string:\n # If the current character is an open parenthesis, push it to the stack\n if char == '(':\n stack.append(char)\n # If the current character is a close parenthesis, pop the stack until the corresponding open parenthesis is found\n elif char == ')':\n while stack and stack[-1] != '(':\n stack.pop()\n # If the stack is not empty and the top element is an open parenthesis, pop both\n if stack and stack[-1] == '(':\n stack.pop()\n # If the current character is a space, ignore it\n elif char == ' ':\n continue\n # If the current character is not a space, add the current group to the separated groups list\n else:\n # If the current character is not a space, add the current group to the separated groups list\n if stack:\n separated_groups.append(''.join(stack))\n stack = []\n # Add the remaining characters in the stack to the separated groups list\n if stack:\n separated_groups.append(''.join(stack))\n return separated_groups\n\n\n# Test the function with the given example\nparen_string = '( ) (( )) (( )( ))'\nseparated_groups = separate_paren_groups(paren_string)\nprint(separated_groups)"}
|
3 |
+
{"task_id": "Python/2", "generation": "\n\ndef truncate_number(number: float) -> float:\n \"\"\" Given a positive floating point number, it can be decomposed into\n and integer part (largest integer smaller than given number) and decimals\n (leftover part always smaller than 1).\n\n Return the decimal part of the number.\n >>> truncate_number(3.5)\n 0.5\n \"\"\"\n\n\n integer_part = int(number)\n decimal_part = number - integer_part\n return decimal_part\n\n", "prompt": "\n\ndef truncate_number(number: float) -> float:\n \"\"\" Given a positive floating point number, it can be decomposed into\n and integer part (largest integer smaller than given number) and decimals\n (leftover part always smaller than 1).\n\n Return the decimal part of the number.\n >>> truncate_number(3.5)\n 0.5\n \"\"\"\n", "wholecode": "def truncate_number(number: float) -> float:\n \"\"\" Given a positive floating point number, it can be decomposed into\n and integer part (largest integer smaller than given number) and decimals\n (leftover part always smaller than 1).\n\n Return the decimal part of the number.\n >>> truncate_number(3.5)\n 0.5\n \"\"\"\n integer_part = int(number)\n decimal_part = number - integer_part\n return decimal_part\n\n\ndef truncate_number_with_precision(number: float, precision: int) -> float:\n \"\"\" Given a positive floating point number and a precision, it can be\n decomposed into an integer part (largest integer smaller than given number)\n and decimals (leftover part always smaller than 1).\n\n Return the decimal part of the number with the given precision.\n >>> truncate_number_with_precision(3.5, 2)\n 0.05\n \"\"\"\n integer_part = int(number)\n decimal_part = number - integer_part\n truncated_decimal_part = decimal_part * 10**precision\n return truncated_decimal_part\n\n\ndef round_number_with_precision(number: float, precision: int) -> float:\n \"\"\" Given a positive floating point number and a precision, it can be\n decomposed into an integer part (largest integer smaller than given number)\n and decimals (leftover part always smaller than 1).\n\n Return the rounded decimal part of the number with the given precision.\n >>> round_number_with_precision(3.5, 2)\n 3.50\n \"\"\"\n integer_part = int(number)\n decimal_part = number - integer_part\n truncated_decimal_part = decimal_part * 10**precision\n rounded_decimal_part = round(truncated_decimal_part, precision)\n return rounded_decimal_part\n\n\ndef round_number(number: float) -> float:\n \"\"\" Given a positive floating point number, it can be decomposed into\n an integer part (largest integer smaller than given number) and decimals\n (leftover part always smaller than 1).\n\n Return the rounded decimal part of the number.\n >>> round_number(3.5)\n 4.0\n \"\"\"\n return round(number)\n\n\ndef round_number_up("}
|
4 |
+
{"task_id": "Python/3", "generation": "from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n \"\"\" You're given a list of deposit and withdrawal operations on a bank account that starts with\n zero balance. Your task is to detect if at any point the balance of account fallls below zero, and\n at that point function should return True. Otherwise it should return False.\n >>> below_zero([1, 2, 3])\n False\n >>> below_zero([1, 2, -4, 5])\n True\n \"\"\"\n\n\n balance = 0\n for operation in operations:\n balance += operation\n if balance < 0:\n return True\n return False\n\n", "prompt": "from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n \"\"\" You're given a list of deposit and withdrawal operations on a bank account that starts with\n zero balance. Your task is to detect if at any point the balance of account fallls below zero, and\n at that point function should return True. Otherwise it should return False.\n >>> below_zero([1, 2, 3])\n False\n >>> below_zero([1, 2, -4, 5])\n True\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n \"\"\" You're given a list of deposit and withdrawal operations on a bank account that starts with\n zero balance. Your task is to detect if at any point the balance of account fallls below zero, and\n at that point function should return True. Otherwise it should return False.\n >>> below_zero([1, 2, 3])\n False\n >>> below_zero([1, 2, -4, 5])\n True\n \"\"\"\n balance = 0\n for operation in operations:\n balance += operation\n if balance < 0:\n return True\n return False\n\n\n# Test the function\noperations = [1, 2, -4, 5]\nresult = below_zero(operations)\nprint(result)"}
|
5 |
+
{"task_id": "Python/4", "generation": "from typing import List\n\n\ndef mean_absolute_deviation(numbers: List[float]) -> float:\n \"\"\" For a given list of input numbers, calculate Mean Absolute Deviation\n around the mean of this dataset.\n Mean Absolute Deviation is the average absolute difference between each\n element and a centerpoint (mean in this case):\n MAD = average | x - x_mean |\n >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\n 1.0\n \"\"\"\n\n\n mean_value = sum(numbers) / len(numbers)\n mad_value = sum(abs(x - mean_value) for x in numbers) / len(numbers)\n return mad_value\n\n", "prompt": "from typing import List\n\n\ndef mean_absolute_deviation(numbers: List[float]) -> float:\n \"\"\" For a given list of input numbers, calculate Mean Absolute Deviation\n around the mean of this dataset.\n Mean Absolute Deviation is the average absolute difference between each\n element and a centerpoint (mean in this case):\n MAD = average | x - x_mean |\n >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\n 1.0\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef mean_absolute_deviation(numbers: List[float]) -> float:\n \"\"\" For a given list of input numbers, calculate Mean Absolute Deviation\n around the mean of this dataset.\n Mean Absolute Deviation is the average absolute difference between each\n element and a centerpoint (mean in this case):\n MAD = average | x - x_mean |\n >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\n 1.0\n \"\"\"\n mean_value = sum(numbers) / len(numbers)\n mad_value = sum(abs(x - mean_value) for x in numbers) / len(numbers)\n return mad_value\n\n\n# Test the function\ntest_numbers = [1.0, 2.0, 3.0, 4.0]\nresult = mean_absolute_deviation(test_numbers)\nprint(result)"}
|
6 |
+
{"task_id": "Python/5", "generation": "from typing import List\n\n\ndef intersperse(numbers: List[int], delimeter: int) -> List[int]:\n \"\"\" Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\n >>> intersperse([], 4)\n []\n >>> intersperse([1, 2, 3], 4)\n [1, 4, 2, 4, 3]\n \"\"\"\n\n\n result = []\n for i in range(len(numbers)):\n result.append(numbers[i])\n if i < len(numbers) - 1:\n result.append(delimeter)\n return result\n\n", "prompt": "from typing import List\n\n\ndef intersperse(numbers: List[int], delimeter: int) -> List[int]:\n \"\"\" Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\n >>> intersperse([], 4)\n []\n >>> intersperse([1, 2, 3], 4)\n [1, 4, 2, 4, 3]\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef intersperse(numbers: List[int], delimeter: int) -> List[int]:\n \"\"\" Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\n >>> intersperse([], 4)\n []\n >>> intersperse([1, 2, 3], 4)\n [1, 4, 2, 4, 3]\n \"\"\"\n result = []\n for i in range(len(numbers)):\n result.append(numbers[i])\n if i < len(numbers) - 1:\n result.append(delimeter)\n return result\n\n\ndef intersperse_with_zero(numbers: List[int]) -> List[int]:\n \"\"\" Insert a number '0' between every two consecutive elements of input list `numbers'\n >>> intersperse_with_zero([1, 2, 3])\n [1, 0, 2, 0, 3]\n \"\"\"\n return intersperse(numbers, 0)\n\n\ndef intersperse_with_one(numbers: List[int]) -> List[int]:\n \"\"\" Insert a number '1' between every two consecutive elements of input list `numbers'\n >>> intersperse_with_one([1, 2, 3])\n [1, 1, 2, 1, 3]\n \"\"\"\n return intersperse(numbers, 1)\n\n\ndef intersperse_with_two(numbers: List[int]) -> List[int]:\n \"\"\" Insert a number '2' between every two consecutive elements of input list `numbers'\n >>> intersperse_with_two([1, 2, 3])\n [1, 2, 2, 2, 3]\n \"\"\"\n return intersperse(numbers, 2)\n\n\ndef intersperse_with_three(numbers: List[int]) -> List[int]:\n \"\"\" Insert a number '3' between every two consecutive elements of input list `numbers'\n >>> intersperse_with_three([1, 2, 3])\n [1, 3, 2, 3, 3]\n \"\"\"\n return intersperse(numbers, 3)\n\n\ndef intersperse_with_four(numbers: List[int]) -> List[int]:\n \"\"\" Insert a number '4' between every two consecutive elements of input list `numbers"}
|
7 |
+
{"task_id": "Python/6", "generation": "from typing import List\n\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n \"\"\" Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n For each of the group, output the deepest level of nesting of parentheses.\n E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n >>> parse_nested_parens('(()()) ((())) () ((())()())')\n [2, 3, 1, 3]\n \"\"\"\n\n\n levels = []\n current_level = 0\n for char in paren_string:\n if char == '(':\n current_level += 1\n elif char == ')':\n current_level -= 1\n levels.append(current_level)\n return levels\n\n", "prompt": "from typing import List\n\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n \"\"\" Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n For each of the group, output the deepest level of nesting of parentheses.\n E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n >>> parse_nested_parens('(()()) ((())) () ((())()())')\n [2, 3, 1, 3]\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n \"\"\" Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n For each of the group, output the deepest level of nesting of parentheses.\n E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n >>> parse_nested_parens('(()()) ((())) () ((())()())')\n [2, 3, 1, 3]\n \"\"\"\n levels = []\n current_level = 0\n for char in paren_string:\n if char == '(':\n current_level += 1\n elif char == ')':\n current_level -= 1\n levels.append(current_level)\n return levels\n\n\ndef max_nesting_level(paren_strings: List[str]) -> List[int]:\n \"\"\" Given a list of nested parentheses strings, return the maximum nesting level for each string.\n E.g. [('(()())', '((()))', '()', '((())()())']\n \"\"\"\n max_levels = []\n for paren_string in paren_strings:\n levels = parse_nested_parens(paren_string)\n max_level = max(levels)\n max_levels.append(max_level)\n return max_levels\n\n\n# Testing the function\nparen_strings = [\n '(()())',\n '((()))',\n '()',\n '((())()())'\n]\n\nmax_nesting_levels = max_nesting_level(paren_strings)\nprint(max_nesting_levels)"}
|
8 |
+
{"task_id": "Python/7", "generation": "from typing import List\n\n\ndef filter_by_substring(strings: List[str], substring: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that contain given substring\n >>> filter_by_substring([], 'a')\n []\n >>> filter_by_substring(['abc', 'bacd', 'cde', 'array'], 'a')\n ['abc', 'bacd', 'array']\n \"\"\"\n\n\n return [s for s in strings if substring in s]\n\n", "prompt": "from typing import List\n\n\ndef filter_by_substring(strings: List[str], substring: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that contain given substring\n >>> filter_by_substring([], 'a')\n []\n >>> filter_by_substring(['abc', 'bacd', 'cde', 'array'], 'a')\n ['abc', 'bacd', 'array']\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef filter_by_substring(strings: List[str], substring: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that contain given substring\n >>> filter_by_substring([], 'a')\n []\n >>> filter_by_substring(['abc', 'bacd', 'cde', 'array'], 'a')\n ['abc', 'bacd', 'array']\n \"\"\"\n return [s for s in strings if substring in s]\n\n\ndef filter_by_substring_and_length(strings: List[str], substring: str, min_length: int) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that contain given substring and have minimum length\n >>> filter_by_substring_and_length([], 'a', 1)\n []\n >>> filter_by_substring_and_length(['abc', 'bacd', 'cde', 'array'], 'a', 3)\n ['array']\n \"\"\"\n return [s for s in strings if substring in s and len(s) >= min_length]\n\n\ndef filter_by_substring_and_length_and_start_with(strings: List[str], substring: str, min_length: int, start_with: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that contain given substring, have minimum length and start with given prefix\n >>> filter_by_substring_and_length_and_start_with([], 'a', 1, 'a')\n []\n >>> filter_by_substring_and_length_and_start_with(['abc', 'bacd', 'cde', 'array'], 'a', 3, 'a')\n ['array']\n \"\"\"\n return [s for s in strings if substring in s and len(s) >= min_length and s.startswith(start_with)]\n\n\ndef filter_by_substring_and_length_and_end_with(strings: List[str], substring: str, min_length: int, end_with: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that contain given substring, have minimum length and end with given suffix\n >>> filter_by_substring_and_length_and_end_with([], 'a', 1, 'a')\n []\n >>> filter_by_substring_and_length_and_end_with(['abc', 'bacd', 'cde', 'array'], 'a', 3, 'y')\n "}
|
9 |
+
{"task_id": "Python/8", "generation": "from typing import List, Tuple\n\n\ndef sum_product(numbers: List[int]) -> Tuple[int, int]:\n \"\"\" For a given list of integers, return a tuple consisting of a sum and a product of all the integers in a list.\n Empty sum should be equal to 0 and empty product should be equal to 1.\n >>> sum_product([])\n (0, 1)\n >>> sum_product([1, 2, 3, 4])\n (10, 24)\n \"\"\"\n\n\n sum_ = sum(numbers)\n product_ = 1\n for num in numbers:\n product_ *= num\n return sum_, product_\n\n", "prompt": "from typing import List, Tuple\n\n\ndef sum_product(numbers: List[int]) -> Tuple[int, int]:\n \"\"\" For a given list of integers, return a tuple consisting of a sum and a product of all the integers in a list.\n Empty sum should be equal to 0 and empty product should be equal to 1.\n >>> sum_product([])\n (0, 1)\n >>> sum_product([1, 2, 3, 4])\n (10, 24)\n \"\"\"\n", "wholecode": "from typing import List, Tuple\n\n\ndef sum_product(numbers: List[int]) -> Tuple[int, int]:\n \"\"\" For a given list of integers, return a tuple consisting of a sum and a product of all the integers in a list.\n Empty sum should be equal to 0 and empty product should be equal to 1.\n >>> sum_product([])\n (0, 1)\n >>> sum_product([1, 2, 3, 4])\n (10, 24)\n \"\"\"\n sum_ = sum(numbers)\n product_ = 1\n for num in numbers:\n product_ *= num\n return sum_, product_\n\n\ndef find_min_max_avg(numbers: List[int]) -> Tuple[int, int, float]:\n \"\"\" For a given list of integers, return a tuple consisting of a minimum, a maximum, and an average of all the integers in a list.\n Empty average should be equal to 0.\n >>> find_min_max_avg([])\n (0, 0, 0)\n >>> find_min_max_avg([1, 2, 3, 4])\n (1, 4, 2.5)\n \"\"\"\n min_ = min(numbers)\n max_ = max(numbers)\n avg_ = sum(numbers) / len(numbers) if numbers else 0\n return min_, max_, avg_\n\n\ndef find_mode(numbers: List[int]) -> int:\n \"\"\" For a given list of integers, return the mode of the list.\n If there is no mode, return None.\n >>> find_mode([1, 2, 3, 4])\n 2\n >>> find_mode([1, 2, 2, 3, 4])\n 2\n >>> find_mode([1, 2, 3, 4, 4])\n 4\n \"\"\"\n from collections import Counter\n counter = Counter(numbers)\n mode = counter.most_common(1)[0][0]\n return mode\n\n\ndef find_median(numbers: List[int]) -> float:\n \"\"\" For a given list of integers, return the median of the list.\n If the list is empty or has an odd number of elements, return None.\n >>> find_median([1, 2, 3, 4])\n 2.5\n >>> find_median([1, 2, 3, 4, 5])\n 3.0\n \"\"\"\n from statistics import median\n if not"}
|
10 |
+
{"task_id": "Python/9", "generation": "from typing import List, Tuple\n\n\ndef rolling_max(numbers: List[int]) -> List[int]:\n \"\"\" From a given list of integers, generate a list of rolling maximum element found until given moment\n in the sequence.\n >>> rolling_max([1, 2, 3, 2, 3, 4, 2])\n [1, 2, 3, 3, 3, 4, 4]\n \"\"\"\n\n\n rolling_max_list = []\n current_max = numbers[0]\n for number in numbers[1:]:\n current_max = max(current_max, number)\n rolling_max_list.append(current_max)\n return rolling_max_list\n\n", "prompt": "from typing import List, Tuple\n\n\ndef rolling_max(numbers: List[int]) -> List[int]:\n \"\"\" From a given list of integers, generate a list of rolling maximum element found until given moment\n in the sequence.\n >>> rolling_max([1, 2, 3, 2, 3, 4, 2])\n [1, 2, 3, 3, 3, 4, 4]\n \"\"\"\n", "wholecode": "from typing import List, Tuple\n\n\ndef rolling_max(numbers: List[int]) -> List[int]:\n \"\"\" From a given list of integers, generate a list of rolling maximum element found until given moment\n in the sequence.\n >>> rolling_max([1, 2, 3, 2, 3, 4, 2])\n [1, 2, 3, 3, 3, 4, 4]\n \"\"\"\n rolling_max_list = []\n current_max = numbers[0]\n for number in numbers[1:]:\n current_max = max(current_max, number)\n rolling_max_list.append(current_max)\n return rolling_max_list\n\n\ndef rolling_min(numbers: List[int]) -> List[int]:\n \"\"\" From a given list of integers, generate a list of rolling minimum element found until given moment\n in the sequence.\n >>> rolling_min([1, 2, 3, 2, 3, 4, 2])\n [1, 1, 2, 2, 2, 2, 2]\n \"\"\"\n rolling_min_list = []\n current_min = numbers[0]\n for number in numbers[1:]:\n current_min = min(current_min, number)\n rolling_min_list.append(current_min)\n return rolling_min_list\n\n\ndef rolling_sum(numbers: List[int]) -> List[int]:\n \"\"\" From a given list of integers, generate a list of rolling sum found until given moment in the sequence.\n >>> rolling_sum([1, 2, 3, 2, 3, 4, 2])\n [1, 3, 6, 4, 7, 11, 13]\n \"\"\"\n rolling_sum_list = []\n current_sum = 0\n for number in numbers:\n current_sum += number\n rolling_sum_list.append(current_sum)\n return rolling_sum_list\n\n\ndef rolling_diff(numbers: List[int]) -> List[int]:\n \"\"\" From a given list of integers, generate a list of rolling difference found until given moment in the sequence.\n >>> rolling_diff([1, 2, 3, 2, 3, 4, 2])\n [1, 1, 1, 1, 1, 1, 1]\n \"\"\"\n rolling_diff_list = []\n current_diff = 0\n"}
|
11 |
+
{"task_id": "Python/10", "generation": "\n\ndef is_palindrome(string: str) -> bool:\n \"\"\" Test if given string is a palindrome \"\"\"\n return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n \"\"\" Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n >>> make_palindrome('')\n ''\n >>> make_palindrome('cat')\n 'catac'\n >>> make_palindrome('cata')\n 'catac'\n \"\"\"\n\n\n if not string:\n return ''\n if is_palindrome(string):\n return string\n for i in range(len(string)):\n if is_palindrome(string[i:]):\n return string[:i] + string[i:][::-1]\n return string\n\n", "prompt": "\n\ndef is_palindrome(string: str) -> bool:\n \"\"\" Test if given string is a palindrome \"\"\"\n return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n \"\"\" Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n >>> make_palindrome('')\n ''\n >>> make_palindrome('cat')\n 'catac'\n >>> make_palindrome('cata')\n 'catac'\n \"\"\"\n", "wholecode": "def is_palindrome(string: str) -> bool:\n \"\"\" Test if given string is a palindrome \"\"\"\n return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n \"\"\" Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n >>> make_palindrome('')\n ''\n >>> make_palindrome('cat')\n 'catac'\n >>> make_palindrome('cata')\n 'catac'\n \"\"\"\n if not string:\n return ''\n if is_palindrome(string):\n return string\n for i in range(len(string)):\n if is_palindrome(string[i:]):\n return string[:i] + string[i:][::-1]\n return string\n\n\ndef make_palindrome_all(strings: list[str]) -> list[str]:\n \"\"\" Make all strings in the list palindromes \"\"\"\n return [make_palindrome(s) for s in strings]\n\n\n# Testing\nprint(make_palindrome_all(['', 'cat', 'cata', 'cata']), end='')"}
|
12 |
+
{"task_id": "Python/11", "generation": "from typing import List\n\n\ndef string_xor(a: str, b: str) -> str:\n \"\"\" Input are two strings a and b consisting only of 1s and 0s.\n Perform binary XOR on these inputs and return result also as a string.\n >>> string_xor('010', '110')\n '100'\n \"\"\"\n\n\n return ''.join(chr(ord(a) ^ ord(b)) for a, b in zip(a, b))\n\n", "prompt": "from typing import List\n\n\ndef string_xor(a: str, b: str) -> str:\n \"\"\" Input are two strings a and b consisting only of 1s and 0s.\n Perform binary XOR on these inputs and return result also as a string.\n >>> string_xor('010', '110')\n '100'\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef string_xor(a: str, b: str) -> str:\n \"\"\" Input are two strings a and b consisting only of 1s and 0s.\n Perform binary XOR on these inputs and return result also as a string.\n >>> string_xor('010', '110')\n '100'\n \"\"\"\n return ''.join(chr(ord(a) ^ ord(b)) for a, b in zip(a, b))\n\n\ndef find_min_xor_subsequence(s: str) -> str:\n \"\"\"\n Given a string s consisting of 1s and 0s, find the minimum length subsequence\n such that the XOR of all characters in the subsequence is 1.\n \"\"\"\n n = len(s)\n min_length = float('inf')\n min_subsequence = ''\n\n for i in range(1, n + 1):\n for j in range(i, n + 1):\n subsequence = s[i - 1:j]\n xor_sum = sum(int(bit) for bit in subsequence)\n if xor_sum == 1:\n length = j - i + 1\n if length < min_length:\n min_length = length\n min_subsequence = subsequence\n\n return min_subsequence\n\n\ndef find_min_xor_subsequence_optimized(s: str) -> str:\n \"\"\"\n Optimized version of the previous function.\n \"\"\"\n n = len(s)\n min_length = float('inf')\n min_subsequence = ''\n\n for i in range(1, n + 1):\n for j in range(i, n + 1):\n subsequence = s[i - 1:j]\n xor_sum = sum(int(bit) for bit in subsequence)\n if xor_sum == 1:\n length = j - i + 1\n if length < min_length:\n min_length = length\n min_subsequence = subsequence\n\n return min_subsequence\n\n\ndef find_min_xor_subsequence_optimized_with_cache(s: str) -> str:\n \"\"\"\n Optimized version with memoization.\n \"\"\"\n n = len(s)\n memo = {}\n\n def find_min_xor_subsequence_util("}
|
13 |
+
{"task_id": "Python/12", "generation": "from typing import List, Optional\n\n\ndef longest(strings: List[str]) -> Optional[str]:\n \"\"\" Out of list of strings, return the longest one. Return the first one in case of multiple\n strings of the same length. Return None in case the input list is empty.\n >>> longest([])\n\n >>> longest(['a', 'b', 'c'])\n 'a'\n >>> longest(['a', 'bb', 'ccc'])\n 'ccc'\n \"\"\"\n\n\n if not strings:\n return None\n return max(strings, key=len)\n\n", "prompt": "from typing import List, Optional\n\n\ndef longest(strings: List[str]) -> Optional[str]:\n \"\"\" Out of list of strings, return the longest one. Return the first one in case of multiple\n strings of the same length. Return None in case the input list is empty.\n >>> longest([])\n\n >>> longest(['a', 'b', 'c'])\n 'a'\n >>> longest(['a', 'bb', 'ccc'])\n 'ccc'\n \"\"\"\n", "wholecode": "from typing import List, Optional\n\n\ndef longest(strings: List[str]) -> Optional[str]:\n \"\"\" Out of list of strings, return the longest one. Return the first one in case of multiple\n strings of the same length. Return None in case the input list is empty.\n >>> longest([])\n\n >>> longest(['a', 'b', 'c'])\n 'a'\n >>> longest(['a', 'bb', 'ccc'])\n 'ccc'\n \"\"\"\n if not strings:\n return None\n return max(strings, key=len)\n\n\ndef longest_word(sentence: str) -> str:\n \"\"\" Return the longest word in the given sentence. Return the first one in case of multiple\n words of the same length. Return None in case the input string is empty.\n >>> longest_word('')\n\n >>> longest_word('a b c')\n 'a'\n >>> longest_word('a b b c')\n 'b'\n >>> longest_word('a b b c d')\n 'b'\n \"\"\"\n words = sentence.split()\n return longest(words)\n\n\ndef longest_word_in_file(filename: str) -> str:\n \"\"\" Return the longest word in the given file. Return the first one in case of multiple\n words of the same length. Return None in case the input file is empty.\n >>> longest_word_in_file('')\n\n >>> longest_word_in_file('a b c')\n 'a'\n >>> longest_word_in_file('a b b c')\n 'b'\n >>> longest_word_in_file('a b b c d')\n 'b'\n \"\"\"\n with open(filename, 'r') as f:\n sentence = f.read()\n return longest_word(sentence)\n\n\ndef longest_word_in_files(filenames: List[str]) -> str:\n \"\"\" Return the longest word in the given list of files. Return the first one in case of multiple\n words of the same length. Return None in case the input list of files is empty.\n >>> longest_word_in_files([''])\n\n >>> longest_word_in_files(['a b c'])\n 'a'\n >>> longest_word_in_files(['a b b c'])\n 'b'\n >>> longest_word_in_files(['a b b c d'])\n 'b'\n \"\"\"\n if not filenames:\n return None\n longest_word_in_file = longest_word_in_file(filenames[0])"}
|
14 |
+
{"task_id": "Python/13", "generation": "\n\ndef greatest_common_divisor(a: int, b: int) -> int:\n \"\"\" Return a greatest common divisor of two integers a and b\n >>> greatest_common_divisor(3, 5)\n 1\n >>> greatest_common_divisor(25, 15)\n 5\n \"\"\"\n\n\n while b != 0:\n a, b = b, a % b\n return a\n\n", "prompt": "\n\ndef greatest_common_divisor(a: int, b: int) -> int:\n \"\"\" Return a greatest common divisor of two integers a and b\n >>> greatest_common_divisor(3, 5)\n 1\n >>> greatest_common_divisor(25, 15)\n 5\n \"\"\"\n", "wholecode": "def greatest_common_divisor(a: int, b: int) -> int:\n \"\"\" Return a greatest common divisor of two integers a and b\n >>> greatest_common_divisor(3, 5)\n 1\n >>> greatest_common_divisor(25, 15)\n 5\n \"\"\"\n while b != 0:\n a, b = b, a % b\n return a\n\n\ndef least_common_multiple(a: int, b: int) -> int:\n \"\"\" Return a least common multiple of two integers a and b\n >>> least_common_multiple(3, 5)\n 15\n >>> least_common_multiple(25, 15)\n 150\n \"\"\"\n return abs(a * b) // greatest_common_divisor(a, b)\n\n\ndef is_prime(n: int) -> bool:\n \"\"\" Check if a number n is prime\n >>> is_prime(2)\n True\n >>> is_prime(3)\n True\n >>> is_prime(4)\n False\n \"\"\"\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\n\ndef next_prime(n: int) -> int:\n \"\"\" Return the next prime number after n\n >>> next_prime(2)\n 3\n >>> next_prime(3)\n 5\n >>> next_prime(4)\n 5\n \"\"\"\n while not is_prime(n + 1):\n n += 1\n return n + 1\n\n\ndef prime_factors(n: int) -> list:\n \"\"\" Return a list of prime factors of n\n >>> prime_factors(15)\n [3, 5]\n >>> prime_factors(25)\n [5]\n \"\"\"\n factors = []\n for i in range(2, n + 1):\n if n % i == 0:\n factors.append(i)\n n //= i\n return factors\n\n\ndef is_perfect_power(n:"}
|
15 |
+
{"task_id": "Python/14", "generation": "from typing import List\n\n\ndef all_prefixes(string: str) -> List[str]:\n \"\"\" Return list of all prefixes from shortest to longest of the input string\n >>> all_prefixes('abc')\n ['a', 'ab', 'abc']\n \"\"\"\n\n\n return [string[:i] for i in range(len(string) + 1)]\n\n", "prompt": "from typing import List\n\n\ndef all_prefixes(string: str) -> List[str]:\n \"\"\" Return list of all prefixes from shortest to longest of the input string\n >>> all_prefixes('abc')\n ['a', 'ab', 'abc']\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef all_prefixes(string: str) -> List[str]:\n \"\"\" Return list of all prefixes from shortest to longest of the input string\n >>> all_prefixes('abc')\n ['a', 'ab', 'abc']\n \"\"\"\n return [string[:i] for i in range(len(string) + 1)]\n\n\ndef all_suffixes(string: str) -> List[str]:\n \"\"\" Return list of all suffixes from shortest to longest of the input string\n >>> all_suffixes('abc')\n ['c', 'bc', 'abc']\n \"\"\"\n return [string[i:] for i in range(len(string))]\n\n\ndef all_substrings(string: str) -> List[str]:\n \"\"\" Return list of all substrings of the input string\n >>> all_substrings('abc')\n ['a', 'b', 'c', 'ab', 'ac', 'bc', 'abc']\n \"\"\"\n return all_prefixes(string) + all_suffixes(string)\n\n\ndef all_permutations(string: str) -> List[str]:\n \"\"\" Return list of all permutations of the input string\n >>> all_permutations('abc')\n ['abc', 'acb', 'bac', 'bca', 'cab', 'cba']\n \"\"\"\n return [''.join(p) for p in permutations(string)]\n\n\ndef permutations(iterable):\n \"\"\" Return all permutations of the input iterable\n >>> list(permutations('abc'))\n [('a', 'b', 'c'), ('a', 'c', 'b'), ('b', 'a', 'c'), ('b', 'c', 'a'), ('c', 'a', 'b'), ('c', 'b', 'a')]\n \"\"\"\n pool = tuple(iterable)\n n = len(pool)\n result = []\n for r in range(n, 0, -1):\n for combination in combinations(pool, r):\n result.append(tuple(combination))\n return result\n\n\ndef combinations(iterable, r):\n \"\"\" Return all combinations of the input iterable of length r\n >>> list(combinations('abc', 2))\n [('a', 'b'), ('a', 'c'), ('b', 'c')]\n "}
|
16 |
+
{"task_id": "Python/15", "generation": "\n\ndef string_sequence(n: int) -> str:\n \"\"\" Return a string containing space-delimited numbers starting from 0 upto n inclusive.\n >>> string_sequence(0)\n '0'\n >>> string_sequence(5)\n '0 1 2 3 4 5'\n \"\"\"\n\n\n return ' '.join(str(i) for i in range(n + 1))\n\n", "prompt": "\n\ndef string_sequence(n: int) -> str:\n \"\"\" Return a string containing space-delimited numbers starting from 0 upto n inclusive.\n >>> string_sequence(0)\n '0'\n >>> string_sequence(5)\n '0 1 2 3 4 5'\n \"\"\"\n", "wholecode": "def string_sequence(n: int) -> str:\n \"\"\" Return a string containing space-delimited numbers starting from 0 upto n inclusive.\n >>> string_sequence(0)\n '0'\n >>> string_sequence(5)\n '0 1 2 3 4 5'\n \"\"\"\n return ' '.join(str(i) for i in range(n + 1))\n\n\ndef string_sequence_with_prefix(n: int, prefix: str) -> str:\n \"\"\" Return a string containing space-delimited numbers starting from 0 upto n inclusive,\n prefixed with the given prefix.\n >>> string_sequence_with_prefix(0, 'foo')\n 'foo 0'\n >>> string_sequence_with_prefix(5, 'foo')\n 'foo 0 1 2 3 4 5'\n \"\"\"\n return prefix + ' ' + string_sequence(n)\n\n\ndef string_sequence_with_prefix_and_suffix(n: int, prefix: str, suffix: str) -> str:\n \"\"\" Return a string containing space-delimited numbers starting from 0 upto n inclusive,\n prefixed with the given prefix and suffixed with the given suffix.\n >>> string_sequence_with_prefix_and_suffix(0, 'foo', 'bar')\n 'foo 0 bar'\n >>> string_sequence_with_prefix_and_suffix(5, 'foo', 'bar')\n 'foo 0 1 2 3 4 5 bar'\n \"\"\"\n return prefix + ' ' + string_sequence(n) + ' ' + suffix\n\n\ndef string_sequence_with_prefix_and_suffix_and_delimiter(n: int, prefix: str, suffix: str, delimiter: str) -> str:\n \"\"\" Return a string containing space-delimited numbers starting from 0 upto n inclusive,\n prefixed with the given prefix and suffixed with the given suffix,\n with the given delimiter between numbers.\n >>> string_sequence_with_prefix_and_suffix_and_delimiter(0, 'foo', 'bar', '*')\n 'foo 0*bar'\n >>> string_sequence_with_prefix_and_suffix_and_delimiter(5, 'foo', 'bar', '*')\n 'foo 0*1*2*3*4*5*bar'\n \"\"\"\n"}
|
17 |
+
{"task_id": "Python/16", "generation": "\n\ndef count_distinct_characters(string: str) -> int:\n \"\"\" Given a string, find out how many distinct characters (regardless of case) does it consist of\n >>> count_distinct_characters('xyzXYZ')\n 3\n >>> count_distinct_characters('Jerry')\n 4\n \"\"\"\n\n\n # Convert string to lowercase to ignore case\n string = string.lower()\n # Use a set to count distinct characters\n distinct_characters = set(string)\n return len(distinct_characters)\n\n", "prompt": "\n\ndef count_distinct_characters(string: str) -> int:\n \"\"\" Given a string, find out how many distinct characters (regardless of case) does it consist of\n >>> count_distinct_characters('xyzXYZ')\n 3\n >>> count_distinct_characters('Jerry')\n 4\n \"\"\"\n", "wholecode": "def count_distinct_characters(string: str) -> int:\n \"\"\" Given a string, find out how many distinct characters (regardless of case) does it consist of\n >>> count_distinct_characters('xyzXYZ')\n 3\n >>> count_distinct_characters('Jerry')\n 4\n \"\"\"\n # Convert string to lowercase to ignore case\n string = string.lower()\n # Use a set to count distinct characters\n distinct_characters = set(string)\n return len(distinct_characters)\n\n\n# Test the function\nprint(count_distinct_characters('xyzXYZ'))\nprint(count_distinct_characters('Jerry'))"}
|
18 |
+
{"task_id": "Python/17", "generation": "from typing import List\n\n\ndef parse_music(music_string: str) -> List[int]:\n \"\"\" Input to this function is a string representing musical notes in a special ASCII format.\n Your task is to parse this string and return list of integers corresponding to how many beats does each\n not last.\n\n Here is a legend:\n 'o' - whole note, lasts four beats\n 'o|' - half note, lasts two beats\n '.|' - quater note, lasts one beat\n\n >>> parse_music('o o| .| o| o| .| .| .| .| o o')\n [4, 2, 1, 2, 2, 1, 1, 1, 1, 4, 4]\n \"\"\"\n\n\n notes = []\n for char in music_string:\n if char == 'o':\n notes.append(4)\n elif char == 'o|':\n notes.append(2)\n elif char == '|':\n notes.append(1)\n return notes\n\n", "prompt": "from typing import List\n\n\ndef parse_music(music_string: str) -> List[int]:\n \"\"\" Input to this function is a string representing musical notes in a special ASCII format.\n Your task is to parse this string and return list of integers corresponding to how many beats does each\n not last.\n\n Here is a legend:\n 'o' - whole note, lasts four beats\n 'o|' - half note, lasts two beats\n '.|' - quater note, lasts one beat\n\n >>> parse_music('o o| .| o| o| .| .| .| .| o o')\n [4, 2, 1, 2, 2, 1, 1, 1, 1, 4, 4]\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef parse_music(music_string: str) -> List[int]:\n \"\"\" Input to this function is a string representing musical notes in a special ASCII format.\n Your task is to parse this string and return list of integers corresponding to how many beats does each\n not last.\n\n Here is a legend:\n 'o' - whole note, lasts four beats\n 'o|' - half note, lasts two beats\n '.|' - quater note, lasts one beat\n\n >>> parse_music('o o| .| o| o| .| .| .| .| o o')\n [4, 2, 1, 2, 2, 1, 1, 1, 1, 4, 4]\n \"\"\"\n notes = []\n for char in music_string:\n if char == 'o':\n notes.append(4)\n elif char == 'o|':\n notes.append(2)\n elif char == '|':\n notes.append(1)\n return notes\n\n\ndef parse_music_v2(music_string: str) -> List[int]:\n \"\"\" Improved version of the function.\n This function should be more robust and handle any possible input format.\n \"\"\"\n notes = []\n for char in music_string:\n if char == 'o':\n notes.append(4)\n elif char == 'o|':\n notes.append(2)\n elif char == '|':\n notes.append(1)\n elif char == '.':\n notes.append(1)\n else:\n raise ValueError(f\"Unknown character: {char}\")\n return notes\n\n\ndef parse_music_v3(music_string: str) -> List[int]:\n \"\"\" Improved version of the function.\n This function should be more robust and handle any possible input format.\n \"\"\"\n notes = []\n for char in music_string:\n if char == 'o':\n notes.append(4)\n elif char == 'o|':\n notes.append(2)\n elif char == '|':\n notes.append(1)\n elif char == '.':\n notes.append(1)\n else:\n raise ValueError(f\"Unknown character: {char}\")\n return notes\n\n\ndef parse_music_v4(music_string: str) -> List[int]:\n \"\"\" Improved version of the function.\n This function should be more robust and handle any possible input format.\n \"\"\"\n notes = []\n for char in music_string:\n if char == 'o':\n notes.append(4)\n elif char == 'o|':\n notes.append(2)\n elif char == '|':\n notes.append(1)\n elif char == '.':\n notes.append("}
|
19 |
+
{"task_id": "Python/18", "generation": "\n\ndef how_many_times(string: str, substring: str) -> int:\n \"\"\" Find how many times a given substring can be found in the original string. Count overlaping cases.\n >>> how_many_times('', 'a')\n 0\n >>> how_many_times('aaa', 'a')\n 3\n >>> how_many_times('aaaa', 'aa')\n 3\n \"\"\"\n\n\n count = 0\n while True:\n pos = string.find(substring, count)\n if pos == -1:\n break\n count += 1\n return count\n\n", "prompt": "\n\ndef how_many_times(string: str, substring: str) -> int:\n \"\"\" Find how many times a given substring can be found in the original string. Count overlaping cases.\n >>> how_many_times('', 'a')\n 0\n >>> how_many_times('aaa', 'a')\n 3\n >>> how_many_times('aaaa', 'aa')\n 3\n \"\"\"\n", "wholecode": "def how_many_times(string: str, substring: str) -> int:\n \"\"\" Find how many times a given substring can be found in the original string. Count overlaping cases.\n >>> how_many_times('', 'a')\n 0\n >>> how_many_times('aaa', 'a')\n 3\n >>> how_many_times('aaaa', 'aa')\n 3\n \"\"\"\n count = 0\n while True:\n pos = string.find(substring, count)\n if pos == -1:\n break\n count += 1\n return count\n\n\n# Test the function\nprint(how_many_times('', 'a'))\nprint(how_many_times('aaa', 'a'))\nprint(how_many_times('aaaa', 'aa'))"}
|
20 |
+
{"task_id": "Python/19", "generation": "from typing import List\n\n\ndef sort_numbers(numbers: str) -> str:\n \"\"\" Input is a space-delimited string of numberals from 'zero' to 'nine'.\n Valid choices are 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight' and 'nine'.\n Return the string with numbers sorted from smallest to largest\n >>> sort_numbers('three one five')\n 'one three five'\n \"\"\"\n\n\n numbers = numbers.split()\n sorted_numbers = sorted(numbers)\n return ' '.join(sorted_numbers)\n\n", "prompt": "from typing import List\n\n\ndef sort_numbers(numbers: str) -> str:\n \"\"\" Input is a space-delimited string of numberals from 'zero' to 'nine'.\n Valid choices are 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight' and 'nine'.\n Return the string with numbers sorted from smallest to largest\n >>> sort_numbers('three one five')\n 'one three five'\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef sort_numbers(numbers: str) -> str:\n \"\"\" Input is a space-delimited string of numberals from 'zero' to 'nine'.\n Valid choices are 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight' and 'nine'.\n Return the string with numbers sorted from smallest to largest\n >>> sort_numbers('three one five')\n 'one three five'\n \"\"\"\n numbers = numbers.split()\n sorted_numbers = sorted(numbers)\n return ' '.join(sorted_numbers)\n\n\ndef sort_numbers_list(numbers: List[str]) -> List[str]:\n \"\"\" Input is a list of numberals from 'zero' to 'nine'.\n Valid choices are 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight' and 'nine'.\n Return the list with numbers sorted from smallest to largest\n >>> sort_numbers_list(['three', 'one', 'five'])\n ['one', 'three', 'five']\n \"\"\"\n sorted_numbers = sorted(numbers)\n return sorted_numbers\n\n\n# Test the functions\ntest_str = \"three one five\"\ntest_list = [\"three\", \"one\", \"five\"]\n\nsorted_str = sort_numbers(test_str)\nsorted_list = sort_numbers_list(test_list)\n\nprint(sorted_str)\nprint(sorted_list)"}
|
21 |
+
{"task_id": "Python/20", "generation": "from typing import List, Tuple\n\n\ndef find_closest_elements(numbers: List[float]) -> Tuple[float, float]:\n \"\"\" From a supplied list of numbers (of length at least two) select and return two that are the closest to each\n other and return them in order (smaller number, larger number).\n >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.2])\n (2.0, 2.2)\n >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.0])\n (2.0, 2.0)\n \"\"\"\n\n\n numbers.sort()\n closest_elements = (numbers[0], numbers[-1])\n return closest_elements\n\n", "prompt": "from typing import List, Tuple\n\n\ndef find_closest_elements(numbers: List[float]) -> Tuple[float, float]:\n \"\"\" From a supplied list of numbers (of length at least two) select and return two that are the closest to each\n other and return them in order (smaller number, larger number).\n >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.2])\n (2.0, 2.2)\n >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.0])\n (2.0, 2.0)\n \"\"\"\n", "wholecode": "from typing import List, Tuple\n\n\ndef find_closest_elements(numbers: List[float]) -> Tuple[float, float]:\n \"\"\" From a supplied list of numbers (of length at least two) select and return two that are the closest to each\n other and return them in order (smaller number, larger number).\n >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.2])\n (2.0, 2.2)\n >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.0])\n (2.0, 2.0)\n \"\"\"\n numbers.sort()\n closest_elements = (numbers[0], numbers[-1])\n return closest_elements\n\n\ndef find_closest_elements_with_given_difference(numbers: List[float], difference: float) -> Tuple[float, float]:\n \"\"\" From a supplied list of numbers (of length at least two) select and return two that are the closest to each\n other and return them in order (smaller number, larger number).\n >>> find_closest_elements_with_given_difference([1.0, 2.0, 3.0, 4.0, 5.0, 2.2], 0.2)\n (2.0, 2.2)\n >>> find_closest_elements_with_given_difference([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.0)\n (2.0, 2.0)\n \"\"\"\n numbers.sort()\n closest_elements = (numbers[0], numbers[-1])\n return closest_elements\n\n\ndef find_closest_elements_with_given_difference_in_range(numbers: List[float], difference: float, min_value: float, max_value: float) -> Tuple[float, float]:\n \"\"\" From a supplied list of numbers (of length at least two) select and return two that are the closest to each\n other and return them in order (smaller number, larger number).\n >>> find_closest_elements_with_given_difference_in_range([1.0, 2.0, 3.0, 4.0, 5.0, 2.2], 0.2, 1.0, 3.0)\n (2.0, 2.2)\n >>> find_closest_elements_with_given_difference_in_range([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.0, 1."}
|
22 |
+
{"task_id": "Python/21", "generation": "from typing import List\n\n\ndef rescale_to_unit(numbers: List[float]) -> List[float]:\n \"\"\" Given list of numbers (of at least two elements), apply a linear transform to that list,\n such that the smallest number will become 0 and the largest will become 1\n >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])\n [0.0, 0.25, 0.5, 0.75, 1.0]\n \"\"\"\n\n\n min_val = min(numbers)\n max_val = max(numbers)\n return [(x - min_val) / (max_val - min_val) for x in numbers]\n\n", "prompt": "from typing import List\n\n\ndef rescale_to_unit(numbers: List[float]) -> List[float]:\n \"\"\" Given list of numbers (of at least two elements), apply a linear transform to that list,\n such that the smallest number will become 0 and the largest will become 1\n >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])\n [0.0, 0.25, 0.5, 0.75, 1.0]\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef rescale_to_unit(numbers: List[float]) -> List[float]:\n \"\"\" Given list of numbers (of at least two elements), apply a linear transform to that list,\n such that the smallest number will become 0 and the largest will become 1\n >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])\n [0.0, 0.25, 0.5, 0.75, 1.0]\n \"\"\"\n min_val = min(numbers)\n max_val = max(numbers)\n return [(x - min_val) / (max_val - min_val) for x in numbers]\n\n\ndef rescale_to_unit_test():\n numbers = [1.0, 2.0, 3.0, 4.0, 5.0]\n expected_result = [0.0, 0.25, 0.5, 0.75, 1.0]\n result = rescale_to_unit(numbers)\n return result == expected_result\n\n\nprint(rescale_to_unit_test())"}
|
23 |
+
{"task_id": "Python/22", "generation": "from typing import List, Any\n\n\ndef filter_integers(values: List[Any]) -> List[int]:\n \"\"\" Filter given list of any python values only for integers\n >>> filter_integers(['a', 3.14, 5])\n [5]\n >>> filter_integers([1, 2, 3, 'abc', {}, []])\n [1, 2, 3]\n \"\"\"\n\n\n return [val for val in values if isinstance(val, int)]\n\n", "prompt": "from typing import List, Any\n\n\ndef filter_integers(values: List[Any]) -> List[int]:\n \"\"\" Filter given list of any python values only for integers\n >>> filter_integers(['a', 3.14, 5])\n [5]\n >>> filter_integers([1, 2, 3, 'abc', {}, []])\n [1, 2, 3]\n \"\"\"\n", "wholecode": "from typing import List, Any\n\n\ndef filter_integers(values: List[Any]) -> List[int]:\n \"\"\" Filter given list of any python values only for integers\n >>> filter_integers(['a', 3.14, 5])\n [5]\n >>> filter_integers([1, 2, 3, 'abc', {}, []])\n [1, 2, 3]\n \"\"\"\n return [val for val in values if isinstance(val, int)]\n\n\ndef filter_even_numbers(values: List[int]) -> List[int]:\n \"\"\" Filter given list of integers only for even numbers\n >>> filter_even_numbers([1, 2, 3, 4, 5, 6, 7, 8, 9])\n [2, 4, 6, 8]\n \"\"\"\n return [val for val in values if val % 2 == 0]\n\n\ndef filter_positive_numbers(values: List[int]) -> List[int]:\n \"\"\" Filter given list of integers only for positive numbers\n >>> filter_positive_numbers([-1, 0, 1, 2, 3, -5, 4])\n [1, 2, 3, 4]\n \"\"\"\n return [val for val in values if val > 0]\n\n\ndef filter_even_positive_numbers(values: List[int]) -> List[int]:\n \"\"\" Filter given list of integers only for even positive numbers\n >>> filter_even_positive_numbers([-1, 0, 1, 2, 3, -5, 4])\n [2, 4]\n \"\"\"\n return [val for val in values if val > 0 and val % 2 == 0]\n\n\ndef filter_even_positive_numbers_with_length(values: List[int], length: int) -> List[int]:\n \"\"\" Filter given list of integers only for even positive numbers with given length\n >>> filter_even_positive_numbers_with_length([-1, 0, 1, 2, 3, -5, 4], 2)\n [2, 4]\n \"\"\"\n return [val for val in values if val > 0 and val % 2 == 0 and val in values[:length]]\n\n\ndef filter_even_positive_numbers_with_length_and_sum(values: List[int], length: int, sum_value: int) -> List"}
|
24 |
+
{"task_id": "Python/23", "generation": "\n\ndef strlen(string: str) -> int:\n \"\"\" Return length of given string\n >>> strlen('')\n 0\n >>> strlen('abc')\n 3\n \"\"\"\n\n\n return len(string)\n\n", "prompt": "\n\ndef strlen(string: str) -> int:\n \"\"\" Return length of given string\n >>> strlen('')\n 0\n >>> strlen('abc')\n 3\n \"\"\"\n", "wholecode": "def strlen(string: str) -> int:\n \"\"\" Return length of given string\n >>> strlen('')\n 0\n >>> strlen('abc')\n 3\n \"\"\"\n return len(string)\n\n\ndef str_to_int(string: str) -> int:\n \"\"\" Convert string to integer\n >>> str_to_int('123')\n 123\n \"\"\"\n return int(string)\n\n\ndef str_to_float(string: str) -> float:\n \"\"\" Convert string to float\n >>> str_to_float('123.45')\n 123.45\n \"\"\"\n return float(string)\n\n\ndef str_to_bool(string: str) -> bool:\n \"\"\" Convert string to boolean\n >>> str_to_bool('True')\n True\n >>> str_to_bool('False')\n False\n \"\"\"\n return string == 'True'\n\n\ndef str_to_list(string: str) -> list:\n \"\"\" Convert string to list\n >>> str_to_list('[1, 2, 3]')\n [1, 2, 3]\n \"\"\"\n return eval(string)\n\n\ndef str_to_dict(string: str) -> dict:\n \"\"\" Convert string to dictionary\n >>> str_to_dict('{\"a\": 1, \"b\": 2}')\n {'a': 1, 'b': 2}\n \"\"\"\n return eval(string)\n\n\ndef str_to_set(string: str) -> set:\n \"\"\" Convert string to set\n >>> str_to_set('{1, 2, 3}')\n {1, 2, 3}\n \"\"\"\n return set(eval(string))\n\n\ndef str_to_tuple(string: str) -> tuple:\n \"\"\" Convert string to tuple\n >>> str_to_tuple('(1, 2, 3)')\n (1, 2, 3)\n \"\"\"\n return tuple(eval(string))\n\n\ndef str_to_range(string: str) -> range:\n \"\"\" Convert string to range\n >>> str_to_range('range(1, 10)')\n range(1, 10)\n "}
|
25 |
+
{"task_id": "Python/24", "generation": "\n\ndef largest_divisor(n: int) -> int:\n \"\"\" For a given number n, find the largest number that divides n evenly, smaller than n\n >>> largest_divisor(15)\n 5\n \"\"\"\n\n\n for i in range(n, 0, -1):\n if n % i == 0:\n return i\n\n", "prompt": "\n\ndef largest_divisor(n: int) -> int:\n \"\"\" For a given number n, find the largest number that divides n evenly, smaller than n\n >>> largest_divisor(15)\n 5\n \"\"\"\n", "wholecode": "def largest_divisor(n: int) -> int:\n \"\"\" For a given number n, find the largest number that divides n evenly, smaller than n\n >>> largest_divisor(15)\n 5\n \"\"\"\n for i in range(n, 0, -1):\n if n % i == 0:\n return i\n\n\ndef largest_prime_divisor(n: int) -> int:\n \"\"\" For a given number n, find the largest prime number that divides n evenly, smaller than n\n >>> largest_prime_divisor(15)\n 5\n \"\"\"\n for i in range(n, 1, -1):\n if n % i == 0:\n if is_prime(i):\n return i\n\n\ndef is_prime(n: int) -> bool:\n \"\"\" Check if a number n is prime\n >>> is_prime(15)\n False\n \"\"\"\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\n\ndef largest_prime_divisor_of_prime(n: int) -> int:\n \"\"\" For a given prime number n, find the largest prime number that divides n evenly, smaller than n\n >>> largest_prime_divisor_of_prime(15)\n 5\n \"\"\"\n for i in range(n, 1, -1):\n if n % i == 0:\n if is_prime(i):\n return i\n\n\n# Testing the functions\nprint(largest_divisor(15))\nprint(largest_prime_divisor(15))\nprint(largest_prime_divisor_of_prime(15))"}
|
26 |
+
{"task_id": "Python/25", "generation": "from typing import List\n\n\ndef factorize(n: int) -> List[int]:\n \"\"\" Return list of prime factors of given integer in the order from smallest to largest.\n Each of the factors should be listed number of times corresponding to how many times it appeares in factorization.\n Input number should be equal to the product of all factors\n >>> factorize(8)\n [2, 2, 2]\n >>> factorize(25)\n [5, 5]\n >>> factorize(70)\n [2, 5, 7]\n \"\"\"\n\n\n factors = []\n for i in range(2, n + 1):\n if n % i == 0:\n count = 0\n while n % i == 0:\n n //= i\n count += 1\n factors.append(i)\n if count > 1:\n factors.append(count)\n return factors\n\n", "prompt": "from typing import List\n\n\ndef factorize(n: int) -> List[int]:\n \"\"\" Return list of prime factors of given integer in the order from smallest to largest.\n Each of the factors should be listed number of times corresponding to how many times it appeares in factorization.\n Input number should be equal to the product of all factors\n >>> factorize(8)\n [2, 2, 2]\n >>> factorize(25)\n [5, 5]\n >>> factorize(70)\n [2, 5, 7]\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef factorize(n: int) -> List[int]:\n \"\"\" Return list of prime factors of given integer in the order from smallest to largest.\n Each of the factors should be listed number of times corresponding to how many times it appeares in factorization.\n Input number should be equal to the product of all factors\n >>> factorize(8)\n [2, 2, 2]\n >>> factorize(25)\n [5, 5]\n >>> factorize(70)\n [2, 5, 7]\n \"\"\"\n factors = []\n for i in range(2, n + 1):\n if n % i == 0:\n count = 0\n while n % i == 0:\n n //= i\n count += 1\n factors.append(i)\n if count > 1:\n factors.append(count)\n return factors\n\n\ndef is_prime(n: int) -> bool:\n \"\"\" Check if given number is prime \"\"\"\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\n\ndef find_prime_factors(n: int) -> List[int]:\n \"\"\" Return list of prime factors of given integer in the order from smallest to largest.\n Each of the factors should be listed number of times corresponding to how many times it appeares in factorization.\n Input number should be equal to the product of all factors\n \"\"\"\n factors = factorize(n)\n prime_factors = []\n for factor, count in factors:\n if is_prime(factor):\n prime_factors.append(factor)\n for _ in range(count):\n prime_factors.append(factor)\n return prime_factors\n\n\ndef find_prime_factors_with_counts(n: int) -> List[int]:\n \"\"\" Return list of prime factors of given integer in the order from smallest to largest.\n Each of the factors should be listed number of times corresponding to how many times it appeares in factorization.\n Input number should be equal to the product of all factors\n \"\"\"\n factors = factorize(n)\n prime_factors = []\n for factor, count in factors:\n if is_prime(factor):\n prime_factors.append(factor)\n for _ in range"}
|
27 |
+
{"task_id": "Python/26", "generation": "from typing import List\n\n\ndef remove_duplicates(numbers: List[int]) -> List[int]:\n \"\"\" From a list of integers, remove all elements that occur more than once.\n Keep order of elements left the same as in the input.\n >>> remove_duplicates([1, 2, 3, 2, 4])\n [1, 3, 4]\n \"\"\"\n\n\n seen = set()\n result = []\n for number in numbers:\n if number not in seen:\n seen.add(number)\n result.append(number)\n return result\n\n", "prompt": "from typing import List\n\n\ndef remove_duplicates(numbers: List[int]) -> List[int]:\n \"\"\" From a list of integers, remove all elements that occur more than once.\n Keep order of elements left the same as in the input.\n >>> remove_duplicates([1, 2, 3, 2, 4])\n [1, 3, 4]\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef remove_duplicates(numbers: List[int]) -> List[int]:\n \"\"\" From a list of integers, remove all elements that occur more than once.\n Keep order of elements left the same as in the input.\n >>> remove_duplicates([1, 2, 3, 2, 4])\n [1, 3, 4]\n \"\"\"\n seen = set()\n result = []\n for number in numbers:\n if number not in seen:\n seen.add(number)\n result.append(number)\n return result\n\n\ndef remove_duplicates_in_place(numbers: List[int]) -> List[int]:\n \"\"\" From a list of integers, remove all elements that occur more than once.\n Keep order of elements left the same as in the input.\n >>> remove_duplicates_in_place([1, 2, 3, 2, 4])\n [1, 3, 4]\n \"\"\"\n seen = set()\n result = []\n for number in numbers:\n if number not in seen:\n seen.add(number)\n result.append(number)\n return result\n\n\ndef remove_duplicates_in_place_with_index(numbers: List[int]) -> List[int]:\n \"\"\" From a list of integers, remove all elements that occur more than once.\n Keep order of elements left the same as in the input.\n >>> remove_duplicates_in_place_with_index([1, 2, 3, 2, 4])\n [1, 3, 4]\n \"\"\"\n seen = set()\n result = []\n for i, number in enumerate(numbers):\n if number not in seen:\n seen.add(number)\n result.append(number)\n return result\n\n\ndef remove_duplicates_in_place_with_index_and_count(numbers: List[int]) -> List[int]:\n \"\"\" From a list of integers, remove all elements that occur more than once.\n Keep order of elements left the same as in the input.\n >>> remove_duplicates_in_place_with_index_and_count([1, 2, 3, 2, 4])\n [1, 3, 4]\n \"\"\"\n seen = set()\n result = []\n for i, number in enumerate(numbers):\n if number not in seen:"}
|
28 |
+
{"task_id": "Python/27", "generation": "\n\ndef flip_case(string: str) -> str:\n \"\"\" For a given string, flip lowercase characters to uppercase and uppercase to lowercase.\n >>> flip_case('Hello')\n 'hELLO'\n \"\"\"\n\n\n return ''.join(c.upper() if c.islower() else c.lower() for c in string)\n\n", "prompt": "\n\ndef flip_case(string: str) -> str:\n \"\"\" For a given string, flip lowercase characters to uppercase and uppercase to lowercase.\n >>> flip_case('Hello')\n 'hELLO'\n \"\"\"\n", "wholecode": "def flip_case(string: str) -> str:\n \"\"\" For a given string, flip lowercase characters to uppercase and uppercase to lowercase.\n >>> flip_case('Hello')\n 'hELLO'\n \"\"\"\n return ''.join(c.upper() if c.islower() else c.lower() for c in string)\n\n\ndef is_palindrome(string: str) -> bool:\n \"\"\" Check if a given string is a palindrome.\n >>> is_palindrome('racecar')\n True\n \"\"\"\n return string == string[::-1]\n\n\ndef is_anagram(string1: str, string2: str) -> bool:\n \"\"\" Check if two given strings are anagrams.\n >>> is_anagram('racecar', 'carace')\n True\n \"\"\"\n return sorted(string1) == sorted(string2)\n\n\ndef is_valid_password(password: str) -> bool:\n \"\"\" Check if a given password is valid.\n A valid password must be at least 8 characters long, contain at least one digit, one uppercase and one lowercase letter.\n >>> is_valid_password('12345678')\n False\n \"\"\"\n if len(password) < 8:\n return False\n if not any(char.isdigit() for char in password):\n return False\n if not any(char.isupper() for char in password):\n return False\n if not any(char.islower() for char in password):\n return False\n return True\n\n\ndef encrypt_password(password: str) -> str:\n \"\"\" Encrypt a given password using a simple substitution cipher.\n >>> encrypt_password('password')\n 'vqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqvqv"}
|
29 |
+
{"task_id": "Python/28", "generation": "from typing import List\n\n\ndef concatenate(strings: List[str]) -> str:\n \"\"\" Concatenate list of strings into a single string\n >>> concatenate([])\n ''\n >>> concatenate(['a', 'b', 'c'])\n 'abc'\n \"\"\"\n\n\n return ''.join(strings)\n\n", "prompt": "from typing import List\n\n\ndef concatenate(strings: List[str]) -> str:\n \"\"\" Concatenate list of strings into a single string\n >>> concatenate([])\n ''\n >>> concatenate(['a', 'b', 'c'])\n 'abc'\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef concatenate(strings: List[str]) -> str:\n \"\"\" Concatenate list of strings into a single string\n >>> concatenate([])\n ''\n >>> concatenate(['a', 'b', 'c'])\n 'abc'\n \"\"\"\n return ''.join(strings)\n\n\ndef count_vowels(word: str) -> int:\n \"\"\" Count the number of vowels in a word\n >>> count_vowels('apple')\n 3\n >>> count_vowels('banana')\n 3\n \"\"\"\n vowels = 'aeiou'\n count = sum(1 for char in word if char in vowels)\n return count\n\n\ndef count_consonants(word: str) -> int:\n \"\"\" Count the number of consonants in a word\n >>> count_consonants('apple')\n 2\n >>> count_consonants('banana')\n 3\n \"\"\"\n vowels = 'aeiou'\n consonants = 'bcdfghjklmnpqrstvwxyz'\n count = sum(1 for char in word if char in consonants and char not in vowels)\n return count\n\n\ndef count_pairs(word: str) -> int:\n \"\"\" Count the number of pairs of consecutive identical letters in a word\n >>> count_pairs('apple')\n 1\n >>> count_pairs('banana')\n 1\n \"\"\"\n count = 0\n for i in range(len(word) - 1):\n if word[i] == word[i + 1]:\n count += 1\n return count\n\n\ndef count_triples(word: str) -> int:\n \"\"\" Count the number of triplets of consecutive identical letters in a word\n >>> count_triples('apple')\n 0\n >>> count_triples('banana')\n 0\n \"\"\"\n count = 0\n for i in range(len(word) - 2):\n if word[i] == word[i + 1] == word[i + 2]:\n count += 1\n return count\n\n\ndef count_words(text: str, word: str) -> int:\n \"\"\" Count the number of occurrences of a word in a text\n >>> count_words('The quick brown fox j"}
|
30 |
+
{"task_id": "Python/29", "generation": "from typing import List\n\n\ndef filter_by_prefix(strings: List[str], prefix: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that start with a given prefix.\n >>> filter_by_prefix([], 'a')\n []\n >>> filter_by_prefix(['abc', 'bcd', 'cde', 'array'], 'a')\n ['abc', 'array']\n \"\"\"\n\n\n return [s for s in strings if s.startswith(prefix)]\n\n", "prompt": "from typing import List\n\n\ndef filter_by_prefix(strings: List[str], prefix: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that start with a given prefix.\n >>> filter_by_prefix([], 'a')\n []\n >>> filter_by_prefix(['abc', 'bcd', 'cde', 'array'], 'a')\n ['abc', 'array']\n \"\"\"\n", "wholecode": "from typing import List\n\n\ndef filter_by_prefix(strings: List[str], prefix: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that start with a given prefix.\n >>> filter_by_prefix([], 'a')\n []\n >>> filter_by_prefix(['abc', 'bcd', 'cde', 'array'], 'a')\n ['abc', 'array']\n \"\"\"\n return [s for s in strings if s.startswith(prefix)]\n\n\ndef filter_by_suffix(strings: List[str], suffix: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that end with a given suffix.\n >>> filter_by_suffix([], 'a')\n []\n >>> filter_by_suffix(['abc', 'bcd', 'cde', 'array'], 'y')\n ['array']\n \"\"\"\n return [s for s in strings if s.endswith(suffix)]\n\n\ndef filter_by_substring(strings: List[str], substring: str) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that contain a given substring.\n >>> filter_by_substring([], 'a')\n []\n >>> filter_by_substring(['abc', 'bcd', 'cde', 'array'], 'a')\n ['abc', 'array']\n \"\"\"\n return [s for s in strings if substring in s]\n\n\ndef filter_by_length(strings: List[str], min_length: int, max_length: int) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that are between a given minimum and maximum length.\n >>> filter_by_length([], 1, 1)\n []\n >>> filter_by_length(['abc', 'bcd', 'cde', 'array'], 3, 5)\n ['array']\n \"\"\"\n return [s for s in strings if min_length <= len(s) <= max_length]\n\n\ndef filter_by_case(strings: List[str], case_insensitive: bool) -> List[str]:\n \"\"\" Filter an input list of strings only for ones that match a given case.\n >>> filter_by_case([], True)\n []\n >>> filter_by_case(['abc', 'bcd', 'cde', 'array'], True)\n ['array']\n \"\"\"\n return [s for s in strings if case_insensitive]\n\n\ndef filter_by_range(strings: List["}
|
31 |
+
{"task_id": "Python/30", "generation": "\n\ndef get_positive(l: list):\n \"\"\"Return only positive numbers in the list.\n >>> get_positive([-1, 2, -4, 5, 6])\n [2, 5, 6]\n >>> get_positive([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])\n [5, 3, 2, 3, 9, 123, 1]\n \"\"\"\n\n\n return [num for num in l if num > 0]\n\n", "prompt": "\n\ndef get_positive(l: list):\n \"\"\"Return only positive numbers in the list.\n >>> get_positive([-1, 2, -4, 5, 6])\n [2, 5, 6]\n >>> get_positive([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])\n [5, 3, 2, 3, 9, 123, 1]\n \"\"\"\n", "wholecode": "def get_positive(l: list):\n \"\"\"Return only positive numbers in the list.\n >>> get_positive([-1, 2, -4, 5, 6])\n [2, 5, 6]\n >>> get_positive([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])\n [5, 3, 2, 3, 9, 123, 1]\n \"\"\"\n return [num for num in l if num > 0]\n\n\ndef get_even(l: list):\n \"\"\"Return only even numbers in the list.\n >>> get_even([1, 2, 3, 4, 5, 6, 7, 8, 9])\n [2, 4, 6, 8]\n >>> get_even([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n [2, 4, 6, 8, 10]\n \"\"\"\n return [num for num in l if num % 2 == 0]\n\n\ndef get_odd(l: list):\n \"\"\"Return only odd numbers in the list.\n >>> get_odd([1, 2, 3, 4, 5, 6, 7, 8, 9])\n [1, 3, 5, 7, 9]\n >>> get_odd([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n [1, 3, 5, 7, 9, 11]\n \"\"\"\n return [num for num in l if num % 2 != 0]\n\n\ndef get_prime(l: list):\n \"\"\"Return only prime numbers in the list.\n >>> get_prime([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n [2, 3, 5, 7]\n >>> get_prime([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,"}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.42.3"
|
6 |
+
}
|
model-00001-of-00003.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9942fe83a92ed9718fa7921b58cbe294ba9adcf148d04aae1958d21ce0e65857
|
3 |
+
size 4943162336
|
model-00002-of-00003.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddc066d663a6a7c7a69e612269fdf36b18c6db86c777ea58cf3a59130fe31147
|
3 |
+
size 4999819336
|
model-00003-of-00003.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b421b2916fa79e41d3a335be68395d3c87d5936a40cac8f8749c4ede9be375b9
|
3 |
+
size 4540516344
|
model.safetensors.index.json
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 14483464192
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"lm_head.weight": "model-00003-of-00003.safetensors",
|
7 |
+
"model.embed_tokens.weight": "model-00001-of-00003.safetensors",
|
8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
13 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
15 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
16 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
17 |
+
"model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
18 |
+
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
19 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
20 |
+
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
21 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
22 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
23 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
24 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
25 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
26 |
+
"model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
27 |
+
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
28 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
29 |
+
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
30 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
31 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
32 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
33 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
34 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
35 |
+
"model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
36 |
+
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
37 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
38 |
+
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
39 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
40 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
41 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
42 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
43 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
44 |
+
"model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
45 |
+
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
46 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
47 |
+
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
48 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
49 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
50 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
51 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
52 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
53 |
+
"model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
54 |
+
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
55 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
56 |
+
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
57 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
58 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
59 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
60 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
61 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
62 |
+
"model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
63 |
+
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
64 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
65 |
+
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
66 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
67 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
68 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
69 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
70 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
71 |
+
"model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
72 |
+
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
73 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
74 |
+
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
75 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
76 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
77 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
78 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
79 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
80 |
+
"model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
81 |
+
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
82 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
83 |
+
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
84 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
85 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
86 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
87 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
88 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
89 |
+
"model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
90 |
+
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
91 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
92 |
+
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
93 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
94 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
95 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
96 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
97 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
98 |
+
"model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
99 |
+
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
100 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
101 |
+
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
102 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
103 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
104 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
105 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
106 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
107 |
+
"model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
108 |
+
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
109 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
110 |
+
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
111 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
112 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
113 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
114 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
115 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
116 |
+
"model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
117 |
+
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
118 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
119 |
+
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
120 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
121 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
122 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
123 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
124 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
125 |
+
"model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
126 |
+
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
127 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
128 |
+
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
129 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
130 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
131 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
132 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
133 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
134 |
+
"model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
135 |
+
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
136 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
137 |
+
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
138 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
139 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
140 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
141 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
142 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
143 |
+
"model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
144 |
+
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
145 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
146 |
+
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
147 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
148 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
149 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
150 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
151 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
152 |
+
"model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
153 |
+
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
154 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
155 |
+
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
156 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
157 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
158 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
159 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
160 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
161 |
+
"model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
162 |
+
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
163 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
164 |
+
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
165 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
166 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
167 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
168 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
169 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
170 |
+
"model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
171 |
+
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
172 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
173 |
+
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
174 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
175 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
176 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
177 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
178 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
179 |
+
"model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
180 |
+
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
181 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
182 |
+
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
183 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
184 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
185 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
186 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
187 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
188 |
+
"model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
189 |
+
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
190 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
191 |
+
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
192 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
193 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
194 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
195 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
196 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
197 |
+
"model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
198 |
+
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
199 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
200 |
+
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
201 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
202 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
203 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
204 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
205 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
206 |
+
"model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
207 |
+
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
208 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
209 |
+
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
210 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
211 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
212 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
213 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
214 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
215 |
+
"model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
216 |
+
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
217 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
218 |
+
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
219 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
220 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
221 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
222 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
223 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
224 |
+
"model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
225 |
+
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
226 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
227 |
+
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
228 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
229 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
230 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
231 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
232 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
233 |
+
"model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
234 |
+
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
235 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
236 |
+
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
237 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
238 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
239 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
240 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
241 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
242 |
+
"model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
243 |
+
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
244 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
245 |
+
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
246 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
247 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
248 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
249 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
250 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
251 |
+
"model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
252 |
+
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
253 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
254 |
+
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
255 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
256 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
257 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
258 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
259 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
260 |
+
"model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
261 |
+
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
262 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
263 |
+
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
264 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
265 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
266 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
267 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
268 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
269 |
+
"model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
270 |
+
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
271 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
272 |
+
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
273 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
274 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
275 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
276 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
277 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
278 |
+
"model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
279 |
+
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
280 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
281 |
+
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
282 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
283 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
284 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
285 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
286 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
287 |
+
"model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
288 |
+
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
289 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
290 |
+
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
291 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
292 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
293 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
294 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
295 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
296 |
+
"model.norm.weight": "model-00003-of-00003.safetensors"
|
297 |
+
}
|
298 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<unk>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
tokenizer_config.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"add_prefix_space": null,
|
5 |
+
"added_tokens_decoder": {
|
6 |
+
"0": {
|
7 |
+
"content": "<unk>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false,
|
12 |
+
"special": true
|
13 |
+
},
|
14 |
+
"1": {
|
15 |
+
"content": "<s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false,
|
20 |
+
"special": true
|
21 |
+
},
|
22 |
+
"2": {
|
23 |
+
"content": "</s>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": false,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false,
|
28 |
+
"special": true
|
29 |
+
}
|
30 |
+
},
|
31 |
+
"additional_special_tokens": [],
|
32 |
+
"bos_token": "<s>",
|
33 |
+
"clean_up_tokenization_spaces": false,
|
34 |
+
"eos_token": "</s>",
|
35 |
+
"legacy": false,
|
36 |
+
"model_max_length": 2048,
|
37 |
+
"pad_token": "<unk>",
|
38 |
+
"padding_side": "right",
|
39 |
+
"sp_model_kwargs": {},
|
40 |
+
"spaces_between_special_tokens": false,
|
41 |
+
"tokenizer_class": "LlamaTokenizer",
|
42 |
+
"unk_token": "<unk>",
|
43 |
+
"use_default_system_prompt": false
|
44 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,1604 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.9748743718592965,
|
5 |
+
"eval_steps": 200,
|
6 |
+
"global_step": 222,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.01340033500837521,
|
13 |
+
"grad_norm": 8.789632797241211,
|
14 |
+
"learning_rate": 8.695652173913044e-07,
|
15 |
+
"loss": 0.6247,
|
16 |
+
"step": 1
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.02680067001675042,
|
20 |
+
"grad_norm": 9.524643898010254,
|
21 |
+
"learning_rate": 1.7391304347826088e-06,
|
22 |
+
"loss": 0.6273,
|
23 |
+
"step": 2
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.04020100502512563,
|
27 |
+
"grad_norm": 8.73757266998291,
|
28 |
+
"learning_rate": 2.6086956521739132e-06,
|
29 |
+
"loss": 0.6193,
|
30 |
+
"step": 3
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.05360134003350084,
|
34 |
+
"grad_norm": 6.146267890930176,
|
35 |
+
"learning_rate": 3.4782608695652175e-06,
|
36 |
+
"loss": 0.5812,
|
37 |
+
"step": 4
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.06700167504187604,
|
41 |
+
"grad_norm": 7.021780967712402,
|
42 |
+
"learning_rate": 4.347826086956522e-06,
|
43 |
+
"loss": 0.5455,
|
44 |
+
"step": 5
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.08040201005025126,
|
48 |
+
"grad_norm": 7.42340612411499,
|
49 |
+
"learning_rate": 5.2173913043478265e-06,
|
50 |
+
"loss": 0.5145,
|
51 |
+
"step": 6
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.09380234505862646,
|
55 |
+
"grad_norm": 4.656023025512695,
|
56 |
+
"learning_rate": 6.086956521739132e-06,
|
57 |
+
"loss": 0.4875,
|
58 |
+
"step": 7
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.10720268006700168,
|
62 |
+
"grad_norm": 3.4204678535461426,
|
63 |
+
"learning_rate": 6.956521739130435e-06,
|
64 |
+
"loss": 0.4797,
|
65 |
+
"step": 8
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.12060301507537688,
|
69 |
+
"grad_norm": 3.4276418685913086,
|
70 |
+
"learning_rate": 7.82608695652174e-06,
|
71 |
+
"loss": 0.4667,
|
72 |
+
"step": 9
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.13400335008375208,
|
76 |
+
"grad_norm": 2.8007936477661133,
|
77 |
+
"learning_rate": 8.695652173913044e-06,
|
78 |
+
"loss": 0.453,
|
79 |
+
"step": 10
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.1474036850921273,
|
83 |
+
"grad_norm": 2.540774345397949,
|
84 |
+
"learning_rate": 9.565217391304349e-06,
|
85 |
+
"loss": 0.4417,
|
86 |
+
"step": 11
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.16080402010050251,
|
90 |
+
"grad_norm": 2.691410779953003,
|
91 |
+
"learning_rate": 1.0434782608695653e-05,
|
92 |
+
"loss": 0.45,
|
93 |
+
"step": 12
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.17420435510887772,
|
97 |
+
"grad_norm": 2.2960121631622314,
|
98 |
+
"learning_rate": 1.1304347826086957e-05,
|
99 |
+
"loss": 0.4369,
|
100 |
+
"step": 13
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.18760469011725292,
|
104 |
+
"grad_norm": 2.296201229095459,
|
105 |
+
"learning_rate": 1.2173913043478263e-05,
|
106 |
+
"loss": 0.4267,
|
107 |
+
"step": 14
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.20100502512562815,
|
111 |
+
"grad_norm": 2.5100786685943604,
|
112 |
+
"learning_rate": 1.3043478260869566e-05,
|
113 |
+
"loss": 0.4362,
|
114 |
+
"step": 15
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.21440536013400335,
|
118 |
+
"grad_norm": 2.1405630111694336,
|
119 |
+
"learning_rate": 1.391304347826087e-05,
|
120 |
+
"loss": 0.4166,
|
121 |
+
"step": 16
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.22780569514237856,
|
125 |
+
"grad_norm": 2.600360870361328,
|
126 |
+
"learning_rate": 1.4782608695652174e-05,
|
127 |
+
"loss": 0.4289,
|
128 |
+
"step": 17
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.24120603015075376,
|
132 |
+
"grad_norm": 2.8101706504821777,
|
133 |
+
"learning_rate": 1.565217391304348e-05,
|
134 |
+
"loss": 0.417,
|
135 |
+
"step": 18
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.254606365159129,
|
139 |
+
"grad_norm": 2.1962270736694336,
|
140 |
+
"learning_rate": 1.6521739130434785e-05,
|
141 |
+
"loss": 0.4123,
|
142 |
+
"step": 19
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.26800670016750416,
|
146 |
+
"grad_norm": 2.412221670150757,
|
147 |
+
"learning_rate": 1.739130434782609e-05,
|
148 |
+
"loss": 0.4144,
|
149 |
+
"step": 20
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.2814070351758794,
|
153 |
+
"grad_norm": 2.1126928329467773,
|
154 |
+
"learning_rate": 1.8260869565217393e-05,
|
155 |
+
"loss": 0.4112,
|
156 |
+
"step": 21
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.2948073701842546,
|
160 |
+
"grad_norm": 2.676149368286133,
|
161 |
+
"learning_rate": 1.9130434782608697e-05,
|
162 |
+
"loss": 0.4036,
|
163 |
+
"step": 22
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.3082077051926298,
|
167 |
+
"grad_norm": 2.2323713302612305,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.4132,
|
170 |
+
"step": 23
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.32160804020100503,
|
174 |
+
"grad_norm": 2.510111093521118,
|
175 |
+
"learning_rate": 1.9998753895176576e-05,
|
176 |
+
"loss": 0.4086,
|
177 |
+
"step": 24
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.33500837520938026,
|
181 |
+
"grad_norm": 1.944287896156311,
|
182 |
+
"learning_rate": 1.999501589126174e-05,
|
183 |
+
"loss": 0.3983,
|
184 |
+
"step": 25
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.34840871021775544,
|
188 |
+
"grad_norm": 2.390577554702759,
|
189 |
+
"learning_rate": 1.9988786919844437e-05,
|
190 |
+
"loss": 0.3858,
|
191 |
+
"step": 26
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.36180904522613067,
|
195 |
+
"grad_norm": 2.6762897968292236,
|
196 |
+
"learning_rate": 1.9980068533314937e-05,
|
197 |
+
"loss": 0.4029,
|
198 |
+
"step": 27
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.37520938023450584,
|
202 |
+
"grad_norm": 2.2880735397338867,
|
203 |
+
"learning_rate": 1.9968862904477936e-05,
|
204 |
+
"loss": 0.4051,
|
205 |
+
"step": 28
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.38860971524288107,
|
209 |
+
"grad_norm": 1.8628953695297241,
|
210 |
+
"learning_rate": 1.995517282601106e-05,
|
211 |
+
"loss": 0.3984,
|
212 |
+
"step": 29
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.4020100502512563,
|
216 |
+
"grad_norm": 2.0865204334259033,
|
217 |
+
"learning_rate": 1.993900170976888e-05,
|
218 |
+
"loss": 0.3999,
|
219 |
+
"step": 30
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.4154103852596315,
|
223 |
+
"grad_norm": 1.8910424709320068,
|
224 |
+
"learning_rate": 1.992035358593258e-05,
|
225 |
+
"loss": 0.3881,
|
226 |
+
"step": 31
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.4288107202680067,
|
230 |
+
"grad_norm": 1.797484040260315,
|
231 |
+
"learning_rate": 1.9899233102005573e-05,
|
232 |
+
"loss": 0.3908,
|
233 |
+
"step": 32
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.44221105527638194,
|
237 |
+
"grad_norm": 1.8772716522216797,
|
238 |
+
"learning_rate": 1.987564552165524e-05,
|
239 |
+
"loss": 0.3914,
|
240 |
+
"step": 33
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.4556113902847571,
|
244 |
+
"grad_norm": 2.0772998332977295,
|
245 |
+
"learning_rate": 1.984959672340111e-05,
|
246 |
+
"loss": 0.4001,
|
247 |
+
"step": 34
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.46901172529313234,
|
251 |
+
"grad_norm": 1.7790579795837402,
|
252 |
+
"learning_rate": 1.9821093199149806e-05,
|
253 |
+
"loss": 0.3782,
|
254 |
+
"step": 35
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.4824120603015075,
|
258 |
+
"grad_norm": 2.1140856742858887,
|
259 |
+
"learning_rate": 1.9790142052577148e-05,
|
260 |
+
"loss": 0.3877,
|
261 |
+
"step": 36
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.49581239530988275,
|
265 |
+
"grad_norm": 1.9867079257965088,
|
266 |
+
"learning_rate": 1.9756750997357738e-05,
|
267 |
+
"loss": 0.3942,
|
268 |
+
"step": 37
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.509212730318258,
|
272 |
+
"grad_norm": 1.9369703531265259,
|
273 |
+
"learning_rate": 1.972092835524257e-05,
|
274 |
+
"loss": 0.3953,
|
275 |
+
"step": 38
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.5226130653266332,
|
279 |
+
"grad_norm": 1.8987735509872437,
|
280 |
+
"learning_rate": 1.9682683053985073e-05,
|
281 |
+
"loss": 0.3892,
|
282 |
+
"step": 39
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.5360134003350083,
|
286 |
+
"grad_norm": 2.057279348373413,
|
287 |
+
"learning_rate": 1.9642024625116117e-05,
|
288 |
+
"loss": 0.3953,
|
289 |
+
"step": 40
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.5494137353433836,
|
293 |
+
"grad_norm": 1.8630274534225464,
|
294 |
+
"learning_rate": 1.959896320156857e-05,
|
295 |
+
"loss": 0.3938,
|
296 |
+
"step": 41
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.5628140703517588,
|
300 |
+
"grad_norm": 1.9051440954208374,
|
301 |
+
"learning_rate": 1.955350951515195e-05,
|
302 |
+
"loss": 0.3865,
|
303 |
+
"step": 42
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.576214405360134,
|
307 |
+
"grad_norm": 1.9074325561523438,
|
308 |
+
"learning_rate": 1.950567489387783e-05,
|
309 |
+
"loss": 0.3845,
|
310 |
+
"step": 43
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.5896147403685092,
|
314 |
+
"grad_norm": 1.8396834135055542,
|
315 |
+
"learning_rate": 1.945547125913667e-05,
|
316 |
+
"loss": 0.3881,
|
317 |
+
"step": 44
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.6030150753768844,
|
321 |
+
"grad_norm": 1.882157325744629,
|
322 |
+
"learning_rate": 1.9402911122726756e-05,
|
323 |
+
"loss": 0.3877,
|
324 |
+
"step": 45
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.6164154103852596,
|
328 |
+
"grad_norm": 1.9075357913970947,
|
329 |
+
"learning_rate": 1.9348007583735985e-05,
|
330 |
+
"loss": 0.3879,
|
331 |
+
"step": 46
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.6298157453936348,
|
335 |
+
"grad_norm": 1.7783665657043457,
|
336 |
+
"learning_rate": 1.9290774325277305e-05,
|
337 |
+
"loss": 0.3778,
|
338 |
+
"step": 47
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.6432160804020101,
|
342 |
+
"grad_norm": 1.8143409490585327,
|
343 |
+
"learning_rate": 1.923122561107861e-05,
|
344 |
+
"loss": 0.3787,
|
345 |
+
"step": 48
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.6566164154103853,
|
349 |
+
"grad_norm": 1.833724856376648,
|
350 |
+
"learning_rate": 1.916937628192789e-05,
|
351 |
+
"loss": 0.3779,
|
352 |
+
"step": 49
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 0.6700167504187605,
|
356 |
+
"grad_norm": 2.056809186935425,
|
357 |
+
"learning_rate": 1.9105241751974624e-05,
|
358 |
+
"loss": 0.3861,
|
359 |
+
"step": 50
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 0.6834170854271356,
|
363 |
+
"grad_norm": 1.8272616863250732,
|
364 |
+
"learning_rate": 1.903883800488824e-05,
|
365 |
+
"loss": 0.3783,
|
366 |
+
"step": 51
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"epoch": 0.6968174204355109,
|
370 |
+
"grad_norm": 2.2034404277801514,
|
371 |
+
"learning_rate": 1.8970181589874637e-05,
|
372 |
+
"loss": 0.3807,
|
373 |
+
"step": 52
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"epoch": 0.7102177554438861,
|
377 |
+
"grad_norm": 1.6915210485458374,
|
378 |
+
"learning_rate": 1.8899289617551803e-05,
|
379 |
+
"loss": 0.3732,
|
380 |
+
"step": 53
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"epoch": 0.7236180904522613,
|
384 |
+
"grad_norm": 1.9049310684204102,
|
385 |
+
"learning_rate": 1.882617975568547e-05,
|
386 |
+
"loss": 0.382,
|
387 |
+
"step": 54
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"epoch": 0.7370184254606366,
|
391 |
+
"grad_norm": 1.736574649810791,
|
392 |
+
"learning_rate": 1.875087022478594e-05,
|
393 |
+
"loss": 0.3735,
|
394 |
+
"step": 55
|
395 |
+
},
|
396 |
+
{
|
397 |
+
"epoch": 0.7504187604690117,
|
398 |
+
"grad_norm": 1.87579345703125,
|
399 |
+
"learning_rate": 1.867337979356715e-05,
|
400 |
+
"loss": 0.3785,
|
401 |
+
"step": 56
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"epoch": 0.7638190954773869,
|
405 |
+
"grad_norm": 1.8350921869277954,
|
406 |
+
"learning_rate": 1.8593727774269122e-05,
|
407 |
+
"loss": 0.3698,
|
408 |
+
"step": 57
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"epoch": 0.7772194304857621,
|
412 |
+
"grad_norm": 1.7165268659591675,
|
413 |
+
"learning_rate": 1.851193401784495e-05,
|
414 |
+
"loss": 0.3772,
|
415 |
+
"step": 58
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"epoch": 0.7906197654941374,
|
419 |
+
"grad_norm": 1.7701345682144165,
|
420 |
+
"learning_rate": 1.842801890901351e-05,
|
421 |
+
"loss": 0.3851,
|
422 |
+
"step": 59
|
423 |
+
},
|
424 |
+
{
|
425 |
+
"epoch": 0.8040201005025126,
|
426 |
+
"grad_norm": 1.6949541568756104,
|
427 |
+
"learning_rate": 1.834200336117918e-05,
|
428 |
+
"loss": 0.3853,
|
429 |
+
"step": 60
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 0.8174204355108877,
|
433 |
+
"grad_norm": 1.6625339984893799,
|
434 |
+
"learning_rate": 1.8253908811219764e-05,
|
435 |
+
"loss": 0.3699,
|
436 |
+
"step": 61
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"epoch": 0.830820770519263,
|
440 |
+
"grad_norm": 1.782524585723877,
|
441 |
+
"learning_rate": 1.8163757214143993e-05,
|
442 |
+
"loss": 0.3718,
|
443 |
+
"step": 62
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"epoch": 0.8442211055276382,
|
447 |
+
"grad_norm": 1.8429073095321655,
|
448 |
+
"learning_rate": 1.8071571037619856e-05,
|
449 |
+
"loss": 0.3737,
|
450 |
+
"step": 63
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"epoch": 0.8576214405360134,
|
454 |
+
"grad_norm": 1.6888030767440796,
|
455 |
+
"learning_rate": 1.7977373256375194e-05,
|
456 |
+
"loss": 0.3796,
|
457 |
+
"step": 64
|
458 |
+
},
|
459 |
+
{
|
460 |
+
"epoch": 0.8710217755443886,
|
461 |
+
"grad_norm": 1.7543060779571533,
|
462 |
+
"learning_rate": 1.7881187346471924e-05,
|
463 |
+
"loss": 0.3811,
|
464 |
+
"step": 65
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"epoch": 0.8844221105527639,
|
468 |
+
"grad_norm": 1.5395281314849854,
|
469 |
+
"learning_rate": 1.77830372794553e-05,
|
470 |
+
"loss": 0.3813,
|
471 |
+
"step": 66
|
472 |
+
},
|
473 |
+
{
|
474 |
+
"epoch": 0.897822445561139,
|
475 |
+
"grad_norm": 1.7886697053909302,
|
476 |
+
"learning_rate": 1.7682947516379706e-05,
|
477 |
+
"loss": 0.3775,
|
478 |
+
"step": 67
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"epoch": 0.9112227805695142,
|
482 |
+
"grad_norm": 1.8174864053726196,
|
483 |
+
"learning_rate": 1.7580943001712457e-05,
|
484 |
+
"loss": 0.379,
|
485 |
+
"step": 68
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"epoch": 0.9246231155778895,
|
489 |
+
"grad_norm": 1.7818920612335205,
|
490 |
+
"learning_rate": 1.7477049157117093e-05,
|
491 |
+
"loss": 0.3736,
|
492 |
+
"step": 69
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"epoch": 0.9380234505862647,
|
496 |
+
"grad_norm": 1.729671835899353,
|
497 |
+
"learning_rate": 1.737129187511779e-05,
|
498 |
+
"loss": 0.3652,
|
499 |
+
"step": 70
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"epoch": 0.9514237855946399,
|
503 |
+
"grad_norm": 1.6677137613296509,
|
504 |
+
"learning_rate": 1.7263697512646397e-05,
|
505 |
+
"loss": 0.3762,
|
506 |
+
"step": 71
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"epoch": 0.964824120603015,
|
510 |
+
"grad_norm": 1.597731113433838,
|
511 |
+
"learning_rate": 1.7154292884473712e-05,
|
512 |
+
"loss": 0.374,
|
513 |
+
"step": 72
|
514 |
+
},
|
515 |
+
{
|
516 |
+
"epoch": 0.9782244556113903,
|
517 |
+
"grad_norm": 1.757295846939087,
|
518 |
+
"learning_rate": 1.7043105256526723e-05,
|
519 |
+
"loss": 0.3792,
|
520 |
+
"step": 73
|
521 |
+
},
|
522 |
+
{
|
523 |
+
"epoch": 0.9916247906197655,
|
524 |
+
"grad_norm": 1.7294024229049683,
|
525 |
+
"learning_rate": 1.693016233909332e-05,
|
526 |
+
"loss": 0.3765,
|
527 |
+
"step": 74
|
528 |
+
},
|
529 |
+
{
|
530 |
+
"epoch": 1.0050251256281406,
|
531 |
+
"grad_norm": 1.6741400957107544,
|
532 |
+
"learning_rate": 1.681549227991634e-05,
|
533 |
+
"loss": 0.3694,
|
534 |
+
"step": 75
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 1.018425460636516,
|
538 |
+
"grad_norm": 1.5644919872283936,
|
539 |
+
"learning_rate": 1.6699123657178553e-05,
|
540 |
+
"loss": 0.3538,
|
541 |
+
"step": 76
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"epoch": 1.031825795644891,
|
545 |
+
"grad_norm": 1.651426911354065,
|
546 |
+
"learning_rate": 1.658108547238038e-05,
|
547 |
+
"loss": 0.3468,
|
548 |
+
"step": 77
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 1.0452261306532664,
|
552 |
+
"grad_norm": 1.64875328540802,
|
553 |
+
"learning_rate": 1.64614071431121e-05,
|
554 |
+
"loss": 0.3612,
|
555 |
+
"step": 78
|
556 |
+
},
|
557 |
+
{
|
558 |
+
"epoch": 1.0586264656616415,
|
559 |
+
"grad_norm": 1.6128103733062744,
|
560 |
+
"learning_rate": 1.634011849572239e-05,
|
561 |
+
"loss": 0.3604,
|
562 |
+
"step": 79
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"epoch": 1.0720268006700167,
|
566 |
+
"grad_norm": 1.6095118522644043,
|
567 |
+
"learning_rate": 1.6217249757884954e-05,
|
568 |
+
"loss": 0.3586,
|
569 |
+
"step": 80
|
570 |
+
},
|
571 |
+
{
|
572 |
+
"epoch": 1.085427135678392,
|
573 |
+
"grad_norm": 1.8337584733963013,
|
574 |
+
"learning_rate": 1.609283155106517e-05,
|
575 |
+
"loss": 0.3484,
|
576 |
+
"step": 81
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"epoch": 1.0988274706867671,
|
580 |
+
"grad_norm": 1.6831597089767456,
|
581 |
+
"learning_rate": 1.596689488288856e-05,
|
582 |
+
"loss": 0.3576,
|
583 |
+
"step": 82
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"epoch": 1.1122278056951425,
|
587 |
+
"grad_norm": 1.809220314025879,
|
588 |
+
"learning_rate": 1.5839471139413065e-05,
|
589 |
+
"loss": 0.3497,
|
590 |
+
"step": 83
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"epoch": 1.1256281407035176,
|
594 |
+
"grad_norm": 1.7941969633102417,
|
595 |
+
"learning_rate": 1.571059207730695e-05,
|
596 |
+
"loss": 0.3643,
|
597 |
+
"step": 84
|
598 |
+
},
|
599 |
+
{
|
600 |
+
"epoch": 1.1390284757118927,
|
601 |
+
"grad_norm": 2.1410865783691406,
|
602 |
+
"learning_rate": 1.55802898159344e-05,
|
603 |
+
"loss": 0.3524,
|
604 |
+
"step": 85
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"epoch": 1.152428810720268,
|
608 |
+
"grad_norm": 1.7003251314163208,
|
609 |
+
"learning_rate": 1.5448596829350706e-05,
|
610 |
+
"loss": 0.3514,
|
611 |
+
"step": 86
|
612 |
+
},
|
613 |
+
{
|
614 |
+
"epoch": 1.1658291457286432,
|
615 |
+
"grad_norm": 1.7707059383392334,
|
616 |
+
"learning_rate": 1.5315545938209016e-05,
|
617 |
+
"loss": 0.36,
|
618 |
+
"step": 87
|
619 |
+
},
|
620 |
+
{
|
621 |
+
"epoch": 1.1792294807370185,
|
622 |
+
"grad_norm": 1.5506631135940552,
|
623 |
+
"learning_rate": 1.5181170301580776e-05,
|
624 |
+
"loss": 0.3543,
|
625 |
+
"step": 88
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"epoch": 1.1926298157453936,
|
629 |
+
"grad_norm": 1.9891937971115112,
|
630 |
+
"learning_rate": 1.5045503408691776e-05,
|
631 |
+
"loss": 0.358,
|
632 |
+
"step": 89
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 1.2060301507537687,
|
636 |
+
"grad_norm": 1.6654824018478394,
|
637 |
+
"learning_rate": 1.4908579070575936e-05,
|
638 |
+
"loss": 0.3586,
|
639 |
+
"step": 90
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"epoch": 1.219430485762144,
|
643 |
+
"grad_norm": 1.7871456146240234,
|
644 |
+
"learning_rate": 1.4770431411648898e-05,
|
645 |
+
"loss": 0.3481,
|
646 |
+
"step": 91
|
647 |
+
},
|
648 |
+
{
|
649 |
+
"epoch": 1.2328308207705192,
|
650 |
+
"grad_norm": 1.7653170824050903,
|
651 |
+
"learning_rate": 1.4631094861203478e-05,
|
652 |
+
"loss": 0.3579,
|
653 |
+
"step": 92
|
654 |
+
},
|
655 |
+
{
|
656 |
+
"epoch": 1.2462311557788945,
|
657 |
+
"grad_norm": 1.670673131942749,
|
658 |
+
"learning_rate": 1.4490604144829204e-05,
|
659 |
+
"loss": 0.3542,
|
660 |
+
"step": 93
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"epoch": 1.2596314907872697,
|
664 |
+
"grad_norm": 1.6217372417449951,
|
665 |
+
"learning_rate": 1.4348994275757933e-05,
|
666 |
+
"loss": 0.3573,
|
667 |
+
"step": 94
|
668 |
+
},
|
669 |
+
{
|
670 |
+
"epoch": 1.2730318257956448,
|
671 |
+
"grad_norm": 1.6846883296966553,
|
672 |
+
"learning_rate": 1.4206300546137844e-05,
|
673 |
+
"loss": 0.3618,
|
674 |
+
"step": 95
|
675 |
+
},
|
676 |
+
{
|
677 |
+
"epoch": 1.2864321608040201,
|
678 |
+
"grad_norm": 1.7402368783950806,
|
679 |
+
"learning_rate": 1.4062558518237893e-05,
|
680 |
+
"loss": 0.3568,
|
681 |
+
"step": 96
|
682 |
+
},
|
683 |
+
{
|
684 |
+
"epoch": 1.2998324958123952,
|
685 |
+
"grad_norm": 1.5939064025878906,
|
686 |
+
"learning_rate": 1.3917804015584932e-05,
|
687 |
+
"loss": 0.3522,
|
688 |
+
"step": 97
|
689 |
+
},
|
690 |
+
{
|
691 |
+
"epoch": 1.3132328308207706,
|
692 |
+
"grad_norm": 1.7298028469085693,
|
693 |
+
"learning_rate": 1.3772073114035762e-05,
|
694 |
+
"loss": 0.3489,
|
695 |
+
"step": 98
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"epoch": 1.3266331658291457,
|
699 |
+
"grad_norm": 1.6836535930633545,
|
700 |
+
"learning_rate": 1.3625402132786247e-05,
|
701 |
+
"loss": 0.3607,
|
702 |
+
"step": 99
|
703 |
+
},
|
704 |
+
{
|
705 |
+
"epoch": 1.3400335008375208,
|
706 |
+
"grad_norm": 1.7039278745651245,
|
707 |
+
"learning_rate": 1.3477827625319826e-05,
|
708 |
+
"loss": 0.3577,
|
709 |
+
"step": 100
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"epoch": 1.3534338358458962,
|
713 |
+
"grad_norm": 1.7519850730895996,
|
714 |
+
"learning_rate": 1.3329386370297615e-05,
|
715 |
+
"loss": 0.3587,
|
716 |
+
"step": 101
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 1.3668341708542713,
|
720 |
+
"grad_norm": 1.6788212060928345,
|
721 |
+
"learning_rate": 1.3180115362392383e-05,
|
722 |
+
"loss": 0.3542,
|
723 |
+
"step": 102
|
724 |
+
},
|
725 |
+
{
|
726 |
+
"epoch": 1.3802345058626466,
|
727 |
+
"grad_norm": 1.7427568435668945,
|
728 |
+
"learning_rate": 1.3030051803068729e-05,
|
729 |
+
"loss": 0.3441,
|
730 |
+
"step": 103
|
731 |
+
},
|
732 |
+
{
|
733 |
+
"epoch": 1.3936348408710217,
|
734 |
+
"grad_norm": 1.7681330442428589,
|
735 |
+
"learning_rate": 1.2879233091311667e-05,
|
736 |
+
"loss": 0.359,
|
737 |
+
"step": 104
|
738 |
+
},
|
739 |
+
{
|
740 |
+
"epoch": 1.4070351758793969,
|
741 |
+
"grad_norm": 1.7159209251403809,
|
742 |
+
"learning_rate": 1.2727696814306034e-05,
|
743 |
+
"loss": 0.3474,
|
744 |
+
"step": 105
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 1.4204355108877722,
|
748 |
+
"grad_norm": 1.589086890220642,
|
749 |
+
"learning_rate": 1.2575480738068971e-05,
|
750 |
+
"loss": 0.3492,
|
751 |
+
"step": 106
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"epoch": 1.4338358458961473,
|
755 |
+
"grad_norm": 1.6435205936431885,
|
756 |
+
"learning_rate": 1.2422622798037833e-05,
|
757 |
+
"loss": 0.3541,
|
758 |
+
"step": 107
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 1.4472361809045227,
|
762 |
+
"grad_norm": 1.6578630208969116,
|
763 |
+
"learning_rate": 1.2269161089615902e-05,
|
764 |
+
"loss": 0.3538,
|
765 |
+
"step": 108
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"epoch": 1.4606365159128978,
|
769 |
+
"grad_norm": 1.617587924003601,
|
770 |
+
"learning_rate": 1.2115133858678192e-05,
|
771 |
+
"loss": 0.3535,
|
772 |
+
"step": 109
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"epoch": 1.474036850921273,
|
776 |
+
"grad_norm": 1.5076704025268555,
|
777 |
+
"learning_rate": 1.1960579492039783e-05,
|
778 |
+
"loss": 0.3404,
|
779 |
+
"step": 110
|
780 |
+
},
|
781 |
+
{
|
782 |
+
"epoch": 1.4874371859296482,
|
783 |
+
"grad_norm": 1.6691168546676636,
|
784 |
+
"learning_rate": 1.1805536507889021e-05,
|
785 |
+
"loss": 0.3576,
|
786 |
+
"step": 111
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"epoch": 1.5008375209380236,
|
790 |
+
"grad_norm": 1.7041524648666382,
|
791 |
+
"learning_rate": 1.1650043546187994e-05,
|
792 |
+
"loss": 0.3538,
|
793 |
+
"step": 112
|
794 |
+
},
|
795 |
+
{
|
796 |
+
"epoch": 1.5142378559463987,
|
797 |
+
"grad_norm": 1.6463046073913574,
|
798 |
+
"learning_rate": 1.1494139359042612e-05,
|
799 |
+
"loss": 0.3554,
|
800 |
+
"step": 113
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 1.5276381909547738,
|
804 |
+
"grad_norm": 1.5577502250671387,
|
805 |
+
"learning_rate": 1.1337862801044792e-05,
|
806 |
+
"loss": 0.3637,
|
807 |
+
"step": 114
|
808 |
+
},
|
809 |
+
{
|
810 |
+
"epoch": 1.541038525963149,
|
811 |
+
"grad_norm": 1.7103004455566406,
|
812 |
+
"learning_rate": 1.1181252819589081e-05,
|
813 |
+
"loss": 0.3448,
|
814 |
+
"step": 115
|
815 |
+
},
|
816 |
+
{
|
817 |
+
"epoch": 1.5544388609715243,
|
818 |
+
"grad_norm": 1.7436065673828125,
|
819 |
+
"learning_rate": 1.1024348445166133e-05,
|
820 |
+
"loss": 0.3407,
|
821 |
+
"step": 116
|
822 |
+
},
|
823 |
+
{
|
824 |
+
"epoch": 1.5678391959798996,
|
825 |
+
"grad_norm": 1.5942566394805908,
|
826 |
+
"learning_rate": 1.086718878163551e-05,
|
827 |
+
"loss": 0.3282,
|
828 |
+
"step": 117
|
829 |
+
},
|
830 |
+
{
|
831 |
+
"epoch": 1.5812395309882747,
|
832 |
+
"grad_norm": 1.629876732826233,
|
833 |
+
"learning_rate": 1.070981299648016e-05,
|
834 |
+
"loss": 0.3391,
|
835 |
+
"step": 118
|
836 |
+
},
|
837 |
+
{
|
838 |
+
"epoch": 1.5946398659966499,
|
839 |
+
"grad_norm": 1.6159772872924805,
|
840 |
+
"learning_rate": 1.0552260311045082e-05,
|
841 |
+
"loss": 0.3433,
|
842 |
+
"step": 119
|
843 |
+
},
|
844 |
+
{
|
845 |
+
"epoch": 1.608040201005025,
|
846 |
+
"grad_norm": 1.6075868606567383,
|
847 |
+
"learning_rate": 1.0394569990762528e-05,
|
848 |
+
"loss": 0.3495,
|
849 |
+
"step": 120
|
850 |
+
},
|
851 |
+
{
|
852 |
+
"epoch": 1.6214405360134003,
|
853 |
+
"grad_norm": 1.539378046989441,
|
854 |
+
"learning_rate": 1.0236781335366239e-05,
|
855 |
+
"loss": 0.353,
|
856 |
+
"step": 121
|
857 |
+
},
|
858 |
+
{
|
859 |
+
"epoch": 1.6348408710217757,
|
860 |
+
"grad_norm": 1.4946749210357666,
|
861 |
+
"learning_rate": 1.0078933669097135e-05,
|
862 |
+
"loss": 0.3482,
|
863 |
+
"step": 122
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"epoch": 1.6482412060301508,
|
867 |
+
"grad_norm": 1.602623462677002,
|
868 |
+
"learning_rate": 9.92106633090287e-06,
|
869 |
+
"loss": 0.3455,
|
870 |
+
"step": 123
|
871 |
+
},
|
872 |
+
{
|
873 |
+
"epoch": 1.661641541038526,
|
874 |
+
"grad_norm": 1.5885684490203857,
|
875 |
+
"learning_rate": 9.763218664633763e-06,
|
876 |
+
"loss": 0.3401,
|
877 |
+
"step": 124
|
878 |
+
},
|
879 |
+
{
|
880 |
+
"epoch": 1.675041876046901,
|
881 |
+
"grad_norm": 1.535250186920166,
|
882 |
+
"learning_rate": 9.605430009237474e-06,
|
883 |
+
"loss": 0.3453,
|
884 |
+
"step": 125
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"epoch": 1.6884422110552764,
|
888 |
+
"grad_norm": 1.5518174171447754,
|
889 |
+
"learning_rate": 9.44773968895492e-06,
|
890 |
+
"loss": 0.3464,
|
891 |
+
"step": 126
|
892 |
+
},
|
893 |
+
{
|
894 |
+
"epoch": 1.7018425460636517,
|
895 |
+
"grad_norm": 1.5169888734817505,
|
896 |
+
"learning_rate": 9.290187003519841e-06,
|
897 |
+
"loss": 0.3371,
|
898 |
+
"step": 127
|
899 |
+
},
|
900 |
+
{
|
901 |
+
"epoch": 1.7152428810720268,
|
902 |
+
"grad_norm": 1.8163830041885376,
|
903 |
+
"learning_rate": 9.132811218364494e-06,
|
904 |
+
"loss": 0.3377,
|
905 |
+
"step": 128
|
906 |
+
},
|
907 |
+
{
|
908 |
+
"epoch": 1.728643216080402,
|
909 |
+
"grad_norm": 1.699095606803894,
|
910 |
+
"learning_rate": 8.975651554833869e-06,
|
911 |
+
"loss": 0.3385,
|
912 |
+
"step": 129
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"epoch": 1.742043551088777,
|
916 |
+
"grad_norm": 1.5736947059631348,
|
917 |
+
"learning_rate": 8.81874718041092e-06,
|
918 |
+
"loss": 0.3409,
|
919 |
+
"step": 130
|
920 |
+
},
|
921 |
+
{
|
922 |
+
"epoch": 1.7554438860971524,
|
923 |
+
"grad_norm": 1.5745042562484741,
|
924 |
+
"learning_rate": 8.662137198955211e-06,
|
925 |
+
"loss": 0.3468,
|
926 |
+
"step": 131
|
927 |
+
},
|
928 |
+
{
|
929 |
+
"epoch": 1.7688442211055277,
|
930 |
+
"grad_norm": 1.5846389532089233,
|
931 |
+
"learning_rate": 8.50586064095739e-06,
|
932 |
+
"loss": 0.353,
|
933 |
+
"step": 132
|
934 |
+
},
|
935 |
+
{
|
936 |
+
"epoch": 1.7822445561139029,
|
937 |
+
"grad_norm": 1.8534716367721558,
|
938 |
+
"learning_rate": 8.349956453812009e-06,
|
939 |
+
"loss": 0.3477,
|
940 |
+
"step": 133
|
941 |
+
},
|
942 |
+
{
|
943 |
+
"epoch": 1.795644891122278,
|
944 |
+
"grad_norm": 1.6759575605392456,
|
945 |
+
"learning_rate": 8.194463492110982e-06,
|
946 |
+
"loss": 0.3434,
|
947 |
+
"step": 134
|
948 |
+
},
|
949 |
+
{
|
950 |
+
"epoch": 1.809045226130653,
|
951 |
+
"grad_norm": 1.6878952980041504,
|
952 |
+
"learning_rate": 8.03942050796022e-06,
|
953 |
+
"loss": 0.3446,
|
954 |
+
"step": 135
|
955 |
+
},
|
956 |
+
{
|
957 |
+
"epoch": 1.8224455611390284,
|
958 |
+
"grad_norm": 1.7165716886520386,
|
959 |
+
"learning_rate": 7.884866141321811e-06,
|
960 |
+
"loss": 0.334,
|
961 |
+
"step": 136
|
962 |
+
},
|
963 |
+
{
|
964 |
+
"epoch": 1.8358458961474038,
|
965 |
+
"grad_norm": 1.56163489818573,
|
966 |
+
"learning_rate": 7.730838910384098e-06,
|
967 |
+
"loss": 0.351,
|
968 |
+
"step": 137
|
969 |
+
},
|
970 |
+
{
|
971 |
+
"epoch": 1.849246231155779,
|
972 |
+
"grad_norm": 1.5513216257095337,
|
973 |
+
"learning_rate": 7.57737720196217e-06,
|
974 |
+
"loss": 0.3394,
|
975 |
+
"step": 138
|
976 |
+
},
|
977 |
+
{
|
978 |
+
"epoch": 1.862646566164154,
|
979 |
+
"grad_norm": 1.7233706712722778,
|
980 |
+
"learning_rate": 7.424519261931036e-06,
|
981 |
+
"loss": 0.3335,
|
982 |
+
"step": 139
|
983 |
+
},
|
984 |
+
{
|
985 |
+
"epoch": 1.8760469011725294,
|
986 |
+
"grad_norm": 1.634465217590332,
|
987 |
+
"learning_rate": 7.27230318569397e-06,
|
988 |
+
"loss": 0.347,
|
989 |
+
"step": 140
|
990 |
+
},
|
991 |
+
{
|
992 |
+
"epoch": 1.8894472361809045,
|
993 |
+
"grad_norm": 1.5293488502502441,
|
994 |
+
"learning_rate": 7.1207669086883366e-06,
|
995 |
+
"loss": 0.3422,
|
996 |
+
"step": 141
|
997 |
+
},
|
998 |
+
{
|
999 |
+
"epoch": 1.9028475711892798,
|
1000 |
+
"grad_norm": 1.5170069932937622,
|
1001 |
+
"learning_rate": 6.969948196931272e-06,
|
1002 |
+
"loss": 0.3369,
|
1003 |
+
"step": 142
|
1004 |
+
},
|
1005 |
+
{
|
1006 |
+
"epoch": 1.916247906197655,
|
1007 |
+
"grad_norm": 1.6170374155044556,
|
1008 |
+
"learning_rate": 6.819884637607619e-06,
|
1009 |
+
"loss": 0.3397,
|
1010 |
+
"step": 143
|
1011 |
+
},
|
1012 |
+
{
|
1013 |
+
"epoch": 1.92964824120603,
|
1014 |
+
"grad_norm": 1.4933662414550781,
|
1015 |
+
"learning_rate": 6.670613629702391e-06,
|
1016 |
+
"loss": 0.3493,
|
1017 |
+
"step": 144
|
1018 |
+
},
|
1019 |
+
{
|
1020 |
+
"epoch": 1.9430485762144054,
|
1021 |
+
"grad_norm": 1.5872507095336914,
|
1022 |
+
"learning_rate": 6.522172374680177e-06,
|
1023 |
+
"loss": 0.3498,
|
1024 |
+
"step": 145
|
1025 |
+
},
|
1026 |
+
{
|
1027 |
+
"epoch": 1.9564489112227805,
|
1028 |
+
"grad_norm": 1.5772573947906494,
|
1029 |
+
"learning_rate": 6.374597867213756e-06,
|
1030 |
+
"loss": 0.3508,
|
1031 |
+
"step": 146
|
1032 |
+
},
|
1033 |
+
{
|
1034 |
+
"epoch": 1.9698492462311559,
|
1035 |
+
"grad_norm": 1.5273511409759521,
|
1036 |
+
"learning_rate": 6.2279268859642396e-06,
|
1037 |
+
"loss": 0.3477,
|
1038 |
+
"step": 147
|
1039 |
+
},
|
1040 |
+
{
|
1041 |
+
"epoch": 1.983249581239531,
|
1042 |
+
"grad_norm": 1.5936107635498047,
|
1043 |
+
"learning_rate": 6.082195984415069e-06,
|
1044 |
+
"loss": 0.3397,
|
1045 |
+
"step": 148
|
1046 |
+
},
|
1047 |
+
{
|
1048 |
+
"epoch": 1.996649916247906,
|
1049 |
+
"grad_norm": 1.5770097970962524,
|
1050 |
+
"learning_rate": 5.937441481762112e-06,
|
1051 |
+
"loss": 0.3518,
|
1052 |
+
"step": 149
|
1053 |
+
},
|
1054 |
+
{
|
1055 |
+
"epoch": 2.0100502512562812,
|
1056 |
+
"grad_norm": 1.4779218435287476,
|
1057 |
+
"learning_rate": 5.793699453862161e-06,
|
1058 |
+
"loss": 0.3364,
|
1059 |
+
"step": 150
|
1060 |
+
},
|
1061 |
+
{
|
1062 |
+
"epoch": 2.023450586264657,
|
1063 |
+
"grad_norm": 1.4762550592422485,
|
1064 |
+
"learning_rate": 5.651005724242072e-06,
|
1065 |
+
"loss": 0.3265,
|
1066 |
+
"step": 151
|
1067 |
+
},
|
1068 |
+
{
|
1069 |
+
"epoch": 2.036850921273032,
|
1070 |
+
"grad_norm": 1.5051695108413696,
|
1071 |
+
"learning_rate": 5.509395855170798e-06,
|
1072 |
+
"loss": 0.3246,
|
1073 |
+
"step": 152
|
1074 |
+
},
|
1075 |
+
{
|
1076 |
+
"epoch": 2.050251256281407,
|
1077 |
+
"grad_norm": 1.727282166481018,
|
1078 |
+
"learning_rate": 5.368905138796523e-06,
|
1079 |
+
"loss": 0.3227,
|
1080 |
+
"step": 153
|
1081 |
+
},
|
1082 |
+
{
|
1083 |
+
"epoch": 2.063651591289782,
|
1084 |
+
"grad_norm": 1.493577003479004,
|
1085 |
+
"learning_rate": 5.2295685883511086e-06,
|
1086 |
+
"loss": 0.3289,
|
1087 |
+
"step": 154
|
1088 |
+
},
|
1089 |
+
{
|
1090 |
+
"epoch": 2.0770519262981573,
|
1091 |
+
"grad_norm": 1.5364409685134888,
|
1092 |
+
"learning_rate": 5.091420929424065e-06,
|
1093 |
+
"loss": 0.3233,
|
1094 |
+
"step": 155
|
1095 |
+
},
|
1096 |
+
{
|
1097 |
+
"epoch": 2.090452261306533,
|
1098 |
+
"grad_norm": 1.592559814453125,
|
1099 |
+
"learning_rate": 4.954496591308227e-06,
|
1100 |
+
"loss": 0.3308,
|
1101 |
+
"step": 156
|
1102 |
+
},
|
1103 |
+
{
|
1104 |
+
"epoch": 2.103852596314908,
|
1105 |
+
"grad_norm": 1.4885226488113403,
|
1106 |
+
"learning_rate": 4.818829698419225e-06,
|
1107 |
+
"loss": 0.3267,
|
1108 |
+
"step": 157
|
1109 |
+
},
|
1110 |
+
{
|
1111 |
+
"epoch": 2.117252931323283,
|
1112 |
+
"grad_norm": 1.584492564201355,
|
1113 |
+
"learning_rate": 4.684454061790987e-06,
|
1114 |
+
"loss": 0.3344,
|
1115 |
+
"step": 158
|
1116 |
+
},
|
1117 |
+
{
|
1118 |
+
"epoch": 2.130653266331658,
|
1119 |
+
"grad_norm": 1.5516785383224487,
|
1120 |
+
"learning_rate": 4.551403170649299e-06,
|
1121 |
+
"loss": 0.325,
|
1122 |
+
"step": 159
|
1123 |
+
},
|
1124 |
+
{
|
1125 |
+
"epoch": 2.1440536013400333,
|
1126 |
+
"grad_norm": 1.5723347663879395,
|
1127 |
+
"learning_rate": 4.4197101840656e-06,
|
1128 |
+
"loss": 0.3343,
|
1129 |
+
"step": 160
|
1130 |
+
},
|
1131 |
+
{
|
1132 |
+
"epoch": 2.157453936348409,
|
1133 |
+
"grad_norm": 1.56240713596344,
|
1134 |
+
"learning_rate": 4.289407922693053e-06,
|
1135 |
+
"loss": 0.319,
|
1136 |
+
"step": 161
|
1137 |
+
},
|
1138 |
+
{
|
1139 |
+
"epoch": 2.170854271356784,
|
1140 |
+
"grad_norm": 1.4635605812072754,
|
1141 |
+
"learning_rate": 4.1605288605869365e-06,
|
1142 |
+
"loss": 0.3254,
|
1143 |
+
"step": 162
|
1144 |
+
},
|
1145 |
+
{
|
1146 |
+
"epoch": 2.184254606365159,
|
1147 |
+
"grad_norm": 1.489740252494812,
|
1148 |
+
"learning_rate": 4.033105117111441e-06,
|
1149 |
+
"loss": 0.3291,
|
1150 |
+
"step": 163
|
1151 |
+
},
|
1152 |
+
{
|
1153 |
+
"epoch": 2.1976549413735342,
|
1154 |
+
"grad_norm": 1.5082714557647705,
|
1155 |
+
"learning_rate": 3.907168448934836e-06,
|
1156 |
+
"loss": 0.3235,
|
1157 |
+
"step": 164
|
1158 |
+
},
|
1159 |
+
{
|
1160 |
+
"epoch": 2.2110552763819094,
|
1161 |
+
"grad_norm": 1.571510672569275,
|
1162 |
+
"learning_rate": 3.7827502421150497e-06,
|
1163 |
+
"loss": 0.3308,
|
1164 |
+
"step": 165
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"epoch": 2.224455611390285,
|
1168 |
+
"grad_norm": 1.526476263999939,
|
1169 |
+
"learning_rate": 3.6598815042776135e-06,
|
1170 |
+
"loss": 0.3263,
|
1171 |
+
"step": 166
|
1172 |
+
},
|
1173 |
+
{
|
1174 |
+
"epoch": 2.23785594639866,
|
1175 |
+
"grad_norm": 1.5408774614334106,
|
1176 |
+
"learning_rate": 3.5385928568879012e-06,
|
1177 |
+
"loss": 0.3169,
|
1178 |
+
"step": 167
|
1179 |
+
},
|
1180 |
+
{
|
1181 |
+
"epoch": 2.251256281407035,
|
1182 |
+
"grad_norm": 1.4838318824768066,
|
1183 |
+
"learning_rate": 3.4189145276196244e-06,
|
1184 |
+
"loss": 0.3287,
|
1185 |
+
"step": 168
|
1186 |
+
},
|
1187 |
+
{
|
1188 |
+
"epoch": 2.2646566164154103,
|
1189 |
+
"grad_norm": 1.4531508684158325,
|
1190 |
+
"learning_rate": 3.300876342821451e-06,
|
1191 |
+
"loss": 0.3165,
|
1192 |
+
"step": 169
|
1193 |
+
},
|
1194 |
+
{
|
1195 |
+
"epoch": 2.2780569514237854,
|
1196 |
+
"grad_norm": 1.4947620630264282,
|
1197 |
+
"learning_rate": 3.1845077200836638e-06,
|
1198 |
+
"loss": 0.3219,
|
1199 |
+
"step": 170
|
1200 |
+
},
|
1201 |
+
{
|
1202 |
+
"epoch": 2.291457286432161,
|
1203 |
+
"grad_norm": 1.5694292783737183,
|
1204 |
+
"learning_rate": 3.0698376609066828e-06,
|
1205 |
+
"loss": 0.3183,
|
1206 |
+
"step": 171
|
1207 |
+
},
|
1208 |
+
{
|
1209 |
+
"epoch": 2.304857621440536,
|
1210 |
+
"grad_norm": 1.5011779069900513,
|
1211 |
+
"learning_rate": 2.9568947434732777e-06,
|
1212 |
+
"loss": 0.3289,
|
1213 |
+
"step": 172
|
1214 |
+
},
|
1215 |
+
{
|
1216 |
+
"epoch": 2.318257956448911,
|
1217 |
+
"grad_norm": 1.5003248453140259,
|
1218 |
+
"learning_rate": 2.8457071155262885e-06,
|
1219 |
+
"loss": 0.3275,
|
1220 |
+
"step": 173
|
1221 |
+
},
|
1222 |
+
{
|
1223 |
+
"epoch": 2.3316582914572863,
|
1224 |
+
"grad_norm": 1.4831820726394653,
|
1225 |
+
"learning_rate": 2.7363024873536093e-06,
|
1226 |
+
"loss": 0.3302,
|
1227 |
+
"step": 174
|
1228 |
+
},
|
1229 |
+
{
|
1230 |
+
"epoch": 2.3450586264656614,
|
1231 |
+
"grad_norm": 1.4793733358383179,
|
1232 |
+
"learning_rate": 2.628708124882212e-06,
|
1233 |
+
"loss": 0.318,
|
1234 |
+
"step": 175
|
1235 |
+
},
|
1236 |
+
{
|
1237 |
+
"epoch": 2.358458961474037,
|
1238 |
+
"grad_norm": 1.4326947927474976,
|
1239 |
+
"learning_rate": 2.52295084288291e-06,
|
1240 |
+
"loss": 0.3256,
|
1241 |
+
"step": 176
|
1242 |
+
},
|
1243 |
+
{
|
1244 |
+
"epoch": 2.371859296482412,
|
1245 |
+
"grad_norm": 1.52131986618042,
|
1246 |
+
"learning_rate": 2.419056998287547e-06,
|
1247 |
+
"loss": 0.3293,
|
1248 |
+
"step": 177
|
1249 |
+
},
|
1250 |
+
{
|
1251 |
+
"epoch": 2.3852596314907872,
|
1252 |
+
"grad_norm": 1.5197522640228271,
|
1253 |
+
"learning_rate": 2.3170524836202936e-06,
|
1254 |
+
"loss": 0.3195,
|
1255 |
+
"step": 178
|
1256 |
+
},
|
1257 |
+
{
|
1258 |
+
"epoch": 2.3986599664991624,
|
1259 |
+
"grad_norm": 1.454834222793579,
|
1260 |
+
"learning_rate": 2.216962720544703e-06,
|
1261 |
+
"loss": 0.3236,
|
1262 |
+
"step": 179
|
1263 |
+
},
|
1264 |
+
{
|
1265 |
+
"epoch": 2.4120603015075375,
|
1266 |
+
"grad_norm": 1.5067466497421265,
|
1267 |
+
"learning_rate": 2.118812653528077e-06,
|
1268 |
+
"loss": 0.3225,
|
1269 |
+
"step": 180
|
1270 |
+
},
|
1271 |
+
{
|
1272 |
+
"epoch": 2.425460636515913,
|
1273 |
+
"grad_norm": 1.4823540449142456,
|
1274 |
+
"learning_rate": 2.022626743624807e-06,
|
1275 |
+
"loss": 0.3316,
|
1276 |
+
"step": 181
|
1277 |
+
},
|
1278 |
+
{
|
1279 |
+
"epoch": 2.438860971524288,
|
1280 |
+
"grad_norm": 1.4712008237838745,
|
1281 |
+
"learning_rate": 1.928428962380148e-06,
|
1282 |
+
"loss": 0.3193,
|
1283 |
+
"step": 182
|
1284 |
+
},
|
1285 |
+
{
|
1286 |
+
"epoch": 2.4522613065326633,
|
1287 |
+
"grad_norm": 1.5106829404830933,
|
1288 |
+
"learning_rate": 1.8362427858560094e-06,
|
1289 |
+
"loss": 0.3202,
|
1290 |
+
"step": 183
|
1291 |
+
},
|
1292 |
+
{
|
1293 |
+
"epoch": 2.4656616415410384,
|
1294 |
+
"grad_norm": 1.4007306098937988,
|
1295 |
+
"learning_rate": 1.74609118878024e-06,
|
1296 |
+
"loss": 0.3228,
|
1297 |
+
"step": 184
|
1298 |
+
},
|
1299 |
+
{
|
1300 |
+
"epoch": 2.4790619765494135,
|
1301 |
+
"grad_norm": 1.4667021036148071,
|
1302 |
+
"learning_rate": 1.6579966388208257e-06,
|
1303 |
+
"loss": 0.3135,
|
1304 |
+
"step": 185
|
1305 |
+
},
|
1306 |
+
{
|
1307 |
+
"epoch": 2.492462311557789,
|
1308 |
+
"grad_norm": 1.4928709268569946,
|
1309 |
+
"learning_rate": 1.5719810909864941e-06,
|
1310 |
+
"loss": 0.3283,
|
1311 |
+
"step": 186
|
1312 |
+
},
|
1313 |
+
{
|
1314 |
+
"epoch": 2.505862646566164,
|
1315 |
+
"grad_norm": 1.5440924167633057,
|
1316 |
+
"learning_rate": 1.4880659821550547e-06,
|
1317 |
+
"loss": 0.3304,
|
1318 |
+
"step": 187
|
1319 |
+
},
|
1320 |
+
{
|
1321 |
+
"epoch": 2.5192629815745393,
|
1322 |
+
"grad_norm": 1.4452801942825317,
|
1323 |
+
"learning_rate": 1.4062722257308803e-06,
|
1324 |
+
"loss": 0.3269,
|
1325 |
+
"step": 188
|
1326 |
+
},
|
1327 |
+
{
|
1328 |
+
"epoch": 2.5326633165829144,
|
1329 |
+
"grad_norm": 1.492119312286377,
|
1330 |
+
"learning_rate": 1.3266202064328548e-06,
|
1331 |
+
"loss": 0.3236,
|
1332 |
+
"step": 189
|
1333 |
+
},
|
1334 |
+
{
|
1335 |
+
"epoch": 2.5460636515912896,
|
1336 |
+
"grad_norm": 1.4364550113677979,
|
1337 |
+
"learning_rate": 1.249129775214064e-06,
|
1338 |
+
"loss": 0.328,
|
1339 |
+
"step": 190
|
1340 |
+
},
|
1341 |
+
{
|
1342 |
+
"epoch": 2.559463986599665,
|
1343 |
+
"grad_norm": 1.45607328414917,
|
1344 |
+
"learning_rate": 1.1738202443145307e-06,
|
1345 |
+
"loss": 0.3267,
|
1346 |
+
"step": 191
|
1347 |
+
},
|
1348 |
+
{
|
1349 |
+
"epoch": 2.5728643216080402,
|
1350 |
+
"grad_norm": 1.5155068635940552,
|
1351 |
+
"learning_rate": 1.100710382448198e-06,
|
1352 |
+
"loss": 0.3242,
|
1353 |
+
"step": 192
|
1354 |
+
},
|
1355 |
+
{
|
1356 |
+
"epoch": 2.5862646566164154,
|
1357 |
+
"grad_norm": 1.454077124595642,
|
1358 |
+
"learning_rate": 1.029818410125365e-06,
|
1359 |
+
"loss": 0.3276,
|
1360 |
+
"step": 193
|
1361 |
+
},
|
1362 |
+
{
|
1363 |
+
"epoch": 2.5996649916247905,
|
1364 |
+
"grad_norm": 1.468336582183838,
|
1365 |
+
"learning_rate": 9.611619951117657e-07,
|
1366 |
+
"loss": 0.3259,
|
1367 |
+
"step": 194
|
1368 |
+
},
|
1369 |
+
{
|
1370 |
+
"epoch": 2.6130653266331656,
|
1371 |
+
"grad_norm": 1.480252981185913,
|
1372 |
+
"learning_rate": 8.94758248025378e-07,
|
1373 |
+
"loss": 0.3209,
|
1374 |
+
"step": 195
|
1375 |
+
},
|
1376 |
+
{
|
1377 |
+
"epoch": 2.626465661641541,
|
1378 |
+
"grad_norm": 1.5078731775283813,
|
1379 |
+
"learning_rate": 8.306237180721121e-07,
|
1380 |
+
"loss": 0.3227,
|
1381 |
+
"step": 196
|
1382 |
+
},
|
1383 |
+
{
|
1384 |
+
"epoch": 2.6398659966499163,
|
1385 |
+
"grad_norm": 1.4447978734970093,
|
1386 |
+
"learning_rate": 7.687743889213939e-07,
|
1387 |
+
"loss": 0.321,
|
1388 |
+
"step": 197
|
1389 |
+
},
|
1390 |
+
{
|
1391 |
+
"epoch": 2.6532663316582914,
|
1392 |
+
"grad_norm": 1.4051856994628906,
|
1393 |
+
"learning_rate": 7.092256747226944e-07,
|
1394 |
+
"loss": 0.3241,
|
1395 |
+
"step": 198
|
1396 |
+
},
|
1397 |
+
{
|
1398 |
+
"epoch": 2.6666666666666665,
|
1399 |
+
"grad_norm": 1.4788079261779785,
|
1400 |
+
"learning_rate": 6.519924162640168e-07,
|
1401 |
+
"loss": 0.3116,
|
1402 |
+
"step": 199
|
1403 |
+
},
|
1404 |
+
{
|
1405 |
+
"epoch": 2.6800670016750416,
|
1406 |
+
"grad_norm": 1.4269615411758423,
|
1407 |
+
"learning_rate": 5.970888772732453e-07,
|
1408 |
+
"loss": 0.3269,
|
1409 |
+
"step": 200
|
1410 |
+
},
|
1411 |
+
{
|
1412 |
+
"epoch": 2.6800670016750416,
|
1413 |
+
"eval_loss": 0.308817058801651,
|
1414 |
+
"eval_runtime": 36.2259,
|
1415 |
+
"eval_samples_per_second": 20.013,
|
1416 |
+
"eval_steps_per_second": 5.024,
|
1417 |
+
"step": 200
|
1418 |
+
},
|
1419 |
+
{
|
1420 |
+
"epoch": 2.693467336683417,
|
1421 |
+
"grad_norm": 1.4678142070770264,
|
1422 |
+
"learning_rate": 5.445287408633304e-07,
|
1423 |
+
"loss": 0.329,
|
1424 |
+
"step": 201
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"epoch": 2.7068676716917923,
|
1428 |
+
"grad_norm": 1.43202805519104,
|
1429 |
+
"learning_rate": 4.943251061221721e-07,
|
1430 |
+
"loss": 0.3108,
|
1431 |
+
"step": 202
|
1432 |
+
},
|
1433 |
+
{
|
1434 |
+
"epoch": 2.7202680067001674,
|
1435 |
+
"grad_norm": 1.4377905130386353,
|
1436 |
+
"learning_rate": 4.464904848480522e-07,
|
1437 |
+
"loss": 0.3222,
|
1438 |
+
"step": 203
|
1439 |
+
},
|
1440 |
+
{
|
1441 |
+
"epoch": 2.7336683417085426,
|
1442 |
+
"grad_norm": 1.4890724420547485,
|
1443 |
+
"learning_rate": 4.0103679843142895e-07,
|
1444 |
+
"loss": 0.3232,
|
1445 |
+
"step": 204
|
1446 |
+
},
|
1447 |
+
{
|
1448 |
+
"epoch": 2.7470686767169177,
|
1449 |
+
"grad_norm": 1.4820594787597656,
|
1450 |
+
"learning_rate": 3.5797537488388326e-07,
|
1451 |
+
"loss": 0.3206,
|
1452 |
+
"step": 205
|
1453 |
+
},
|
1454 |
+
{
|
1455 |
+
"epoch": 2.7604690117252932,
|
1456 |
+
"grad_norm": 1.4427391290664673,
|
1457 |
+
"learning_rate": 3.1731694601492834e-07,
|
1458 |
+
"loss": 0.3237,
|
1459 |
+
"step": 206
|
1460 |
+
},
|
1461 |
+
{
|
1462 |
+
"epoch": 2.7738693467336684,
|
1463 |
+
"grad_norm": 1.4541987180709839,
|
1464 |
+
"learning_rate": 2.790716447574304e-07,
|
1465 |
+
"loss": 0.3205,
|
1466 |
+
"step": 207
|
1467 |
+
},
|
1468 |
+
{
|
1469 |
+
"epoch": 2.7872696817420435,
|
1470 |
+
"grad_norm": 1.379800796508789,
|
1471 |
+
"learning_rate": 2.4324900264226405e-07,
|
1472 |
+
"loss": 0.3193,
|
1473 |
+
"step": 208
|
1474 |
+
},
|
1475 |
+
{
|
1476 |
+
"epoch": 2.8006700167504186,
|
1477 |
+
"grad_norm": 1.3961976766586304,
|
1478 |
+
"learning_rate": 2.098579474228546e-07,
|
1479 |
+
"loss": 0.3125,
|
1480 |
+
"step": 209
|
1481 |
+
},
|
1482 |
+
{
|
1483 |
+
"epoch": 2.8140703517587937,
|
1484 |
+
"grad_norm": 1.44660484790802,
|
1485 |
+
"learning_rate": 1.7890680085019597e-07,
|
1486 |
+
"loss": 0.3225,
|
1487 |
+
"step": 210
|
1488 |
+
},
|
1489 |
+
{
|
1490 |
+
"epoch": 2.8274706867671693,
|
1491 |
+
"grad_norm": 1.3615787029266357,
|
1492 |
+
"learning_rate": 1.504032765988961e-07,
|
1493 |
+
"loss": 0.317,
|
1494 |
+
"step": 211
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"epoch": 2.8408710217755444,
|
1498 |
+
"grad_norm": 1.464227557182312,
|
1499 |
+
"learning_rate": 1.2435447834476254e-07,
|
1500 |
+
"loss": 0.3436,
|
1501 |
+
"step": 212
|
1502 |
+
},
|
1503 |
+
{
|
1504 |
+
"epoch": 2.8542713567839195,
|
1505 |
+
"grad_norm": 1.4321690797805786,
|
1506 |
+
"learning_rate": 1.0076689799442874e-07,
|
1507 |
+
"loss": 0.3206,
|
1508 |
+
"step": 213
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 2.8676716917922946,
|
1512 |
+
"grad_norm": 1.3721367120742798,
|
1513 |
+
"learning_rate": 7.964641406742135e-08,
|
1514 |
+
"loss": 0.3242,
|
1515 |
+
"step": 214
|
1516 |
+
},
|
1517 |
+
{
|
1518 |
+
"epoch": 2.8810720268006698,
|
1519 |
+
"grad_norm": 1.4638272523880005,
|
1520 |
+
"learning_rate": 6.099829023112236e-08,
|
1521 |
+
"loss": 0.316,
|
1522 |
+
"step": 215
|
1523 |
+
},
|
1524 |
+
{
|
1525 |
+
"epoch": 2.8944723618090453,
|
1526 |
+
"grad_norm": 1.5022002458572388,
|
1527 |
+
"learning_rate": 4.482717398894165e-08,
|
1528 |
+
"loss": 0.3188,
|
1529 |
+
"step": 216
|
1530 |
+
},
|
1531 |
+
{
|
1532 |
+
"epoch": 2.9078726968174204,
|
1533 |
+
"grad_norm": 1.4071238040924072,
|
1534 |
+
"learning_rate": 3.1137095522068006e-08,
|
1535 |
+
"loss": 0.3229,
|
1536 |
+
"step": 217
|
1537 |
+
},
|
1538 |
+
{
|
1539 |
+
"epoch": 2.9212730318257956,
|
1540 |
+
"grad_norm": 1.4545257091522217,
|
1541 |
+
"learning_rate": 1.993146668506585e-08,
|
1542 |
+
"loss": 0.3103,
|
1543 |
+
"step": 218
|
1544 |
+
},
|
1545 |
+
{
|
1546 |
+
"epoch": 2.934673366834171,
|
1547 |
+
"grad_norm": 1.4042024612426758,
|
1548 |
+
"learning_rate": 1.1213080155564327e-08,
|
1549 |
+
"loss": 0.3234,
|
1550 |
+
"step": 219
|
1551 |
+
},
|
1552 |
+
{
|
1553 |
+
"epoch": 2.948073701842546,
|
1554 |
+
"grad_norm": 1.4457824230194092,
|
1555 |
+
"learning_rate": 4.984108738261828e-09,
|
1556 |
+
"loss": 0.3187,
|
1557 |
+
"step": 220
|
1558 |
+
},
|
1559 |
+
{
|
1560 |
+
"epoch": 2.9614740368509214,
|
1561 |
+
"grad_norm": 1.4061236381530762,
|
1562 |
+
"learning_rate": 1.246104823426908e-09,
|
1563 |
+
"loss": 0.3134,
|
1564 |
+
"step": 221
|
1565 |
+
},
|
1566 |
+
{
|
1567 |
+
"epoch": 2.9748743718592965,
|
1568 |
+
"grad_norm": 1.4718122482299805,
|
1569 |
+
"learning_rate": 0.0,
|
1570 |
+
"loss": 0.3219,
|
1571 |
+
"step": 222
|
1572 |
+
},
|
1573 |
+
{
|
1574 |
+
"epoch": 2.9748743718592965,
|
1575 |
+
"step": 222,
|
1576 |
+
"total_flos": 9.93148193773663e+18,
|
1577 |
+
"train_loss": 0.36201348014779994,
|
1578 |
+
"train_runtime": 28749.1045,
|
1579 |
+
"train_samples_per_second": 3.987,
|
1580 |
+
"train_steps_per_second": 0.008
|
1581 |
+
}
|
1582 |
+
],
|
1583 |
+
"logging_steps": 1.0,
|
1584 |
+
"max_steps": 222,
|
1585 |
+
"num_input_tokens_seen": 0,
|
1586 |
+
"num_train_epochs": 3,
|
1587 |
+
"save_steps": 500,
|
1588 |
+
"stateful_callbacks": {
|
1589 |
+
"TrainerControl": {
|
1590 |
+
"args": {
|
1591 |
+
"should_epoch_stop": false,
|
1592 |
+
"should_evaluate": false,
|
1593 |
+
"should_log": false,
|
1594 |
+
"should_save": true,
|
1595 |
+
"should_training_stop": true
|
1596 |
+
},
|
1597 |
+
"attributes": {}
|
1598 |
+
}
|
1599 |
+
},
|
1600 |
+
"total_flos": 9.93148193773663e+18,
|
1601 |
+
"train_batch_size": 2,
|
1602 |
+
"trial_name": null,
|
1603 |
+
"trial_params": null
|
1604 |
+
}
|