diff --git "a/weight_sharing_model_4_of_4.serialized.json" "b/weight_sharing_model_4_of_4.serialized.json" new file mode 100644--- /dev/null +++ "b/weight_sharing_model_4_of_4.serialized.json" @@ -0,0 +1,1999 @@ +{ + "version": "QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3", + "info": { + "backendId": 6, + "buildId": "v2.28.0.241029232508_102474", + "coreApiVersion": "2.21.0", + "backendApiVersion": "5.28.0", + "socVersion": "", + "contextBlobVersion": "3.2.0", + "contextBlobSize": 566494808, + "numContextTensors": 0, + "contextTensors": [], + "numGraphs": 2, + "graphs": [ + { + "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", + "info": { + "graphName": "ar128_cl4096_4_of_4", + "numGraphInputs": 20, + "graphInputs": [ + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1, + "name": "past_key_24_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.16835589706897736, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 6, + "name": "past_key_29_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.17043833434581757, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 9, + "name": "past_value_29_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09825660288333893, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 16, + "name": "past_value_24_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07847104221582413, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 125, + "name": "past_key_30_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.2074936181306839, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 128, + "name": "past_value_30_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.10411321371793747, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 267, + "name": "past_key_31_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.18935787677764893, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 270, + "name": "past_value_31_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.19053252041339875, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 441, + "name": "past_key_25_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1846073865890503, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 444, + "name": "past_value_25_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09766243398189545, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 583, + "name": "past_key_26_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15839417278766633, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 586, + "name": "past_value_26_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09959074854850769, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 731, + "name": "past_key_27_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15847137570381165, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 734, + "name": "past_value_27_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.08399864286184311, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 875, + "name": "past_key_28_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1828504055738449, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 878, + "name": "past_value_28_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09687352180480957, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1041, + "name": "_model_layers_23_Add_1_Add_output_0", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 3, + "dimensions": [ + 1, + 128, + 3072 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.191017746925354, + "offset": -41581 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1946, + "name": "position_ids_cos", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 128, + 48 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.00003632373773143627, + "offset": -32768 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1948, + "name": "position_ids_sin", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 128, + 48 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.00003632373773143627, + "offset": -32768 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 2653, + "name": "attention_mask", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 128, + 4096 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.0007629510946571827, + "offset": -65535 + } + } + } + } + ], + "numGraphOutputs": 17, + "graphOutputs": [ + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1913, + "name": "past_value_24_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07847104221582413, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 2556, + "name": "past_key_24_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.16835589706897736, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 3676, + "name": "past_value_25_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09766243398189545, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 4317, + "name": "past_key_25_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1846073865890503, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 5436, + "name": "past_value_26_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09959074854850769, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 6077, + "name": "past_key_26_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15839417278766633, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 7196, + "name": "past_value_27_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.08399864286184311, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 7837, + "name": "past_key_27_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15847137570381165, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 8956, + "name": "past_value_28_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09687352180480957, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 9597, + "name": "past_key_28_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1828504055738449, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 10716, + "name": "past_value_29_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09825660288333893, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 11357, + "name": "past_key_29_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.17043833434581757, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 12476, + "name": "past_value_30_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.10411321371793747, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 13117, + "name": "past_key_30_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.2074936181306839, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 14236, + "name": "past_value_31_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.19053252041339875, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 14877, + "name": "past_key_31_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.18935787677764893, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15140, + "name": "logits", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 3, + "dimensions": [ + 1, + 128, + 32064 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.001873409142717719, + "offset": -26111 + } + } + } + } + ], + "numUpdateableTensors": 0, + "updateableTensors": [], + "graphBlobInfoSize": 40, + "graphBlobInfo": [ + { + "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", + "info": { + "spillFillBufferSize": 0, + "optimizationLevel": 3, + "vtcmSize": 8, + "htpDlbc": 0, + "numHvxThreads": 0 + } + } + ] + } + }, + { + "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", + "info": { + "graphName": "ar1_cl4096_4_of_4", + "numGraphInputs": 20, + "graphInputs": [ + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15141, + "name": "past_key_24_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.16835589706897736, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15146, + "name": "past_key_29_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.17043833434581757, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15149, + "name": "past_value_29_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09825660288333893, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15156, + "name": "past_value_24_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07847104221582413, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15265, + "name": "past_key_30_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.2074936181306839, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15268, + "name": "past_value_30_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.10411321371793747, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15407, + "name": "past_key_31_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.18935787677764893, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15410, + "name": "past_value_31_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.19053252041339875, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15581, + "name": "past_key_25_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1846073865890503, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15584, + "name": "past_value_25_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09766243398189545, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15723, + "name": "past_key_26_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15839417278766633, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15726, + "name": "past_value_26_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09959074854850769, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15871, + "name": "past_key_27_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15847137570381165, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15874, + "name": "past_value_27_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.08399864286184311, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 16015, + "name": "past_key_28_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1828504055738449, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 16018, + "name": "past_value_28_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09687352180480957, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 16181, + "name": "_model_layers_23_Add_1_Add_output_0", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 3, + "dimensions": [ + 1, + 1, + 3072 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.191017746925354, + "offset": -41581 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17086, + "name": "position_ids_cos", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 1, + 48 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.00003632373773143627, + "offset": -32768 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17088, + "name": "position_ids_sin", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 1, + 48 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.00003632373773143627, + "offset": -32768 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17793, + "name": "attention_mask", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 1, + 4096 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.0007629510946571827, + "offset": -65535 + } + } + } + } + ], + "numGraphOutputs": 17, + "graphOutputs": [ + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17053, + "name": "past_value_24_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07847104221582413, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17696, + "name": "past_key_24_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.16835589706897736, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 18816, + "name": "past_value_25_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09766243398189545, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 19457, + "name": "past_key_25_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1846073865890503, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 20576, + "name": "past_value_26_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09959074854850769, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 21217, + "name": "past_key_26_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15839417278766633, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 22336, + "name": "past_value_27_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.08399864286184311, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 22977, + "name": "past_key_27_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15847137570381165, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 24096, + "name": "past_value_28_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09687352180480957, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 24737, + "name": "past_key_28_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1828504055738449, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 25856, + "name": "past_value_29_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.09825660288333893, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 26497, + "name": "past_key_29_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.17043833434581757, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 27616, + "name": "past_value_30_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.10411321371793747, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 28257, + "name": "past_key_30_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.2074936181306839, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 29376, + "name": "past_value_31_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.19053252041339875, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 30017, + "name": "past_key_31_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.18935787677764893, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 30280, + "name": "logits", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 3, + "dimensions": [ + 1, + 1, + 32064 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.001873409142717719, + "offset": -26111 + } + } + } + } + ], + "numUpdateableTensors": 0, + "updateableTensors": [], + "graphBlobInfoSize": 40, + "graphBlobInfo": [ + { + "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", + "info": { + "spillFillBufferSize": 0, + "optimizationLevel": 3, + "vtcmSize": 8, + "htpDlbc": 0, + "numHvxThreads": 0 + } + } + ] + } + } + ], + "contextMetadataSize": 8, + "contextMetadata": { + "version": "QNN_SYSTEM_CONTEXT_HTP_CONTEXT_INFO_BLOB_VERSION_V1", + "info": { + "dsp arch": 73 + } + }, + "soc model": 43 + } +}