|
Hi Piyali i made a project in ccs ,in the main i read a inputimage and call my funs, here is my cmd (i use just one C66 of TDA2X , L2 configure L2_SRAM no L2_CACHE ,i copy data to L2 by EDMA . L1 configure L1D_CACHE) -stack 0x3000 -heap 0x7000000 MEMORY { L1P_SRAM : origin = 0x00E00000, len = 0x8000 // L1D_SRAM : origin = 0x00F00000, len = 0x4000 /* 16 KB SRAM */ // L1D_CACHE : origin = 0x00F00000, len = 0x8000 /* 16 KB cache */ L1D_CACHE : origin = 0x00F04000, len = 0x4000 /* 16 KB cache */ L2_SRAM : origin = 0x00800000, len = 0x48000 /* SARAM in L2, = 256 + 32 - 128 = 160 KB*/ // L2_CACHE : origin = 0x00828000, len = 0x20000 /* Cache for L2, which is configured as 128 KB*/ DSP2_L2_SRAM : origin = 0x40800000, len = 0x48000 SL2_SRAM : origin = 0x5B000000, len = 0x40000 EXT_MEM_CACHE : origin = 0x80000000, len = 0x08000000 /* DSP Used cachable area */ } SECTIONS { vectors :> EXT_MEM_CACHE .cinit :> EXT_MEM_CACHE .cio :> EXT_MEM_CACHE .bss :> EXT_MEM_CACHE ////usually reserves space for uninitialized variables .text :> EXT_MEM_CACHE //////contains executable code .const :> EXT_MEM_CACHE .far :> EXT_MEM_CACHE .fardata :> EXT_MEM_CACHE /////usually contains initialized data .neardata :> EXT_MEM_CACHE ///////usually contains initialized data .rodata :> EXT_MEM_CACHE .sysmem :> EXT_MEM_CACHE .switch :> EXT_MEM_CACHE .L2SramSect :> L2_SRAM .stack :> L2_SRAM } #ifdef TI_DSP #define L2SRAM_SIZE (32*1024) #pragma DATA_SECTION (L2SRAM,".L2SramSect"); unsigned char L2SRAM[L2SRAM_SIZE]; #define L2TMP_SIZE (128*1024) #pragma DATA_SECTION (L2SRAM,".L2SramSect"); unsigned char L2TMP[L2TMP_SIZE]; #endif in the main i malloc DDR space unsigned char *DDR_Init = (unsigned char *)malloc(DDR_SIZE); load my net to DDR_Init , the net is big const float model_detection_global[] = { 0.0640991,-0.219671,0.00918405,0.164087,-0.00767695,0.142808,0.162083,0.145399,0.00453223,-1.25773, -0.0320548,0.161154,0.361267,1.714,0.197743,0.0784551,-0.0696055,-1.14334,-0.0957228,0.72948, -0.54699,0.508312,0.649834,-1.14706,0.405195,0.0574608,-0.567157,0.00642587,-0.0300398,-0.0680399, 0.0273895,-0.0263173,0.0327542,0.0387226,-0.00789206,-0.448488,-0.15092,0.0934956,1.32409,-0.244303, -1.82788,-0.4503,1.11225,0.178051,-0.0456231,0.0360474,-0.365231,-0.383318,0.00687606,0.00101387, -0.00782929,-0.00366878,0.00721993,-0.0535142,0.194668,0.524056,0.482211,-0.148617,0.136613,0.248017, -0.0469616,-0.86986,-0.494906,-0.0215785,0.0179301,-0.0402353,-0.157173,-0.284838,-0.247974,0.00699071, 0.048664,0.0686588,-0.403694,-0.0938685,0.0354045,0.42147,0.0852851,0.648701,0.03408,0.529695, 0.131282,0.284551,0.379638,0.142066,0.309521,0.347878,0.217491,0.219339,9.99999,0.465153, 0.334311,0.420712,0.74588,0.44612,1.1629,0.321116,0.423224,-0.422717,-0.0121956,0.0108994, -0.839461,0.0118817,-1.85014,0.187245,-0.0707816,0.474495,-0.945377,-0.809488,0.0574341,-1.04387, 0.114112,-0.110706,0.68189,-0.153875,-0.155772,-0.107221,-0.137917,-0.118754,-0.0194638,0.0568384, -0.0432765,0.0663145,0.09544 and so on } then call my funcs i Compile my project with CCS .out is 1.6MB here is the map TMS320C6x Linker PC v8.2.2 ****************************************************************************** >> Linked Fri Apr 13 16:57:12 2018 OUTPUT FILE NAME: <Drowsy_Detection_to_boader.out> ENTRY POINT SYMBOL: "_c_int00" address: 87156c60 MEMORY CONFIGURATION name origin length used unused attr fill ---------------------- -------- --------- -------- -------- ---- -------- L2_SRAM 00800000 00048000 0000b000 0003d000 RWIX L1P_SRAM 00e00000 00008000 00000000 00008000 RWIX L1D_CACHE 00f04000 00004000 00000000 00004000 RWIX DSP2_L2_SRAM 40800000 00048000 00000000 00048000 RWIX SL2_SRAM 5b000000 00040000 00000000 00040000 RWIX EXT_MEM_CACHE 80000000 08000000 071599d0 00ea6630 RWIX SEGMENT ALLOCATION MAP run origin load origin length init length attrs members ---------- ----------- ---------- ----------- ----- ------- 00800000 00800000 0000b000 00000000 rw- 00800000 00800000 00008000 00000000 rw- .L2SramSect 00808000 00808000 00003000 00000000 rw- .stack 80000000 80000000 07000000 00000000 rw- 80000000 80000000 07000000 00000000 rw- .sysmem 87000000 87000000 0012577c 0012577c r-- 87000000 87000000 0012577c 0012577c r-- .const 8712577c 8712577c 00000004 00000000 rw- 8712577c 8712577c 00000004 00000000 rw- .neardata 87125780 87125780 000202d0 00000000 rw- 87125780 87125780 000202d0 00000000 rw- .far 87145a60 87145a60 00011a00 00011a00 r-x 87145a60 87145a60 00011a00 00011a00 r-x .text 87157460 87157460 00001448 00000000 rw- 87157460 87157460 00001324 00000000 rw- .fardata 87158788 87158788 00000120 00000000 rw- .cio 871588a8 871588a8 00001140 00001140 r-- 871588a8 871588a8 00000084 00000084 r-- .switch 87158930 87158930 000010b8 000010b8 r-- .cinit SECTION ALLOCATION MAP output attributes/ section page origin length input sections -------- ---- ---------- ---------- ---------------- .L2SramSect * 0 00800000 00008000 UNINITIALIZED 00800000 00008000 main.obj (.L2SramSect) .stack 0 00808000 00003000 UNINITIALIZED 00808000 00000008 rts6600_elf.lib : boot.obj (.stack) 00808008 00002ff8 --HOLE-- .const 0 87000000 0012577c 87000000 00124854 main.obj (.const:model_detection_global) 87124854 00000004 dma_funcs.obj (.const) 87124858 000007d0 main.obj (.const:$P$T0$1) 87125028 00000408 main.obj (.const:proto_detection_global) 87125430 0000013c afq_caffe_cnn.obj (.const:.string) 8712556c 00000004 --HOLE-- [fill = 0] 87125570 00000101 rts6600_elf.lib : ctype.obj (.const:.string:_ctypes_) 87125671 00000003 --HOLE-- [fill = 0] 87125674 00000044 afq_caffe_cnn.obj (.const) 871256b8 00000034 main.obj (.const:.string) 871256ec 00000024 rts6600_elf.lib : _printfi.obj (.const:.string) 87125710 00000024 edma_utils_autoincrement_v2.obj (.const) 87125734 0000001c edma_utils_autoincrement.obj (.const) 87125750 0000001c main.obj (.const) 8712576c 0000000c edma_utils.obj (.const) 87125778 00000004 edma_utils.obj (.const:edmaBase) .neardata * 0 8712577c 00000004 UNINITIALIZED 8712577c 00000004 dma_resource_allocator.obj (.neardata) .far 0 87125780 000202d0 UNINITIALIZED 87125780 00020000 (.common:L2TMP) 87145780 00000140 (.common:__TI_tmpnams) 871458c0 000000c8 dma_resource_allocator.obj (.far:gDmaResourceState) 87145988 000000c0 (.common:gDmaUtilsStateStruct) 87145a48 00000008 (.common:parmbuf) .cio 0 87158788 00000120 UNINITIALIZED 87158788 00000120 rts6600_elf.lib : trgmsg.obj (.cio) .cinit 0 87158930 000010b8 87158930 00001068 (.cinit..fardata.load) [load image, compression = rle] 87159998 0000000d (.cinit..far.load) [load image, compression = rle] 871599a5 0000000b (.cinit..L2SramSect.load) [load image, compression = rle] 871599b0 00000009 (.cinit..neardata.load) [load image, compression = rle] 871599b9 00000003 --HOLE-- [fill = 0] 871599bc 00000008 (__TI_handler_table) 871599c4 00000004 --HOLE-- [fill = 0] 871599c8 00000020 (__TI_cinit_table) .text 0 87145a60 00011a00 87145a60 00006c00 afq_caffe_cnn.obj (.text) 8714c660 00001c40 rts6600_elf.lib : _printfi.obj (.text:__TI_printfi) 8714e2a0 000016c0 edma_utils_autoincrement.obj (.text) 8714f960 00000ac0 edma_utils_autoincrement_v2.obj (.text) 87150420 00000680 rts6600_elf.lib : _printfi.obj (.text:_pconv_a) 87150aa0 00000600 : divd.obj (.text:__c6xabi_divd) 871510a0 000005c0 : _printfi.obj (.text:_pconv_g) 87151660 00000460 : _printfi.obj (.text:_pconv_e) 87151ac0 00000440 : memory.obj (.text:free) 87151f00 000003e0 dma_resource_allocator.obj (.text) 871522e0 000003e0 rts6600_elf.lib : fputs.obj (.text:fputs) 871526c0 000002e0 dma_funcs.obj (.text) 871529a0 000002e0 rts6600_elf.lib : _printfi.obj (.text:fcvt) 87152c80 00000280 : divf.obj (.text:__c6xabi_divf) 87152f00 00000280 : _printfi.obj (.text:_pconv_f) 87153180 00000240 : imath64.obj (.text:__c6xabi_divull) 871533c0 00000240 : _printfi.obj (.text:_ltostr) 87153600 00000220 : fputc.obj (.text:fputc) 87153820 00000220 main.obj (.text) 87153a40 00000220 rts6600_elf.lib : open.obj (.text:open) 87153c60 00000220 : setvbuf.obj (.text:setvbuf) 87153e80 000001e0 : fopen.obj (.text:_openfile) 87154060 000001c0 : imath40.obj (.text:__c6xabi_divul) 87154220 000001a0 : hostrename.obj (.text:HOSTrename) 871543c0 000001a0 edma_utils_state.obj (.text) 87154560 00000180 rts6600_elf.lib : frexp.obj (.text:frexp) 871546e0 00000180 : memory.obj (.text:malloc) 87154860 00000160 : autoinit.obj (.text:_auto_init_elf) 871549c0 00000160 edma_utils_memcpy.obj (.text) 87154b20 00000160 rts6600_elf.lib : fopen.obj (.text:fopen) 87154c80 00000160 : sqrt.obj (.text:sqrt) 87154de0 00000140 : fclose.obj (.text:__TI_closefile) 87154f20 00000140 : getdevice.obj (.text:getdevice) 87155060 00000140 : ldexp.obj (.text:ldexp) 871551a0 00000120 : fseek.obj (.text:fseek) 871552c0 00000100 : copy_decompress_rle.obj (.text:__TI_decompress_rle_core) 871553c0 00000100 : _io_perm.obj (.text:__TI_wrt_ok) 871554c0 00000100 : atoi.obj (.text:atoi) 871555c0 00000100 : cpy_tbl.obj (.text:copy_in) 871556c0 00000100 : fprintf.obj (.text:fprintf) 871557c0 000000e0 : hostlseek.obj (.text:HOSTlseek) 871558a0 000000e0 : hostopen.obj (.text:HOSTopen) 87155980 000000e0 : hostwrite.obj (.text:HOSTwrite) 87155a60 000000e0 : divi.obj (.text:__divi) 87155b40 000000e0 : close.obj (.text:close) 87155c20 000000e0 : exit.obj (.text:exit) 87155d00 000000e0 : ltoa.obj (.text:ltoa) 87155de0 000000e0 : memset.obj (.text:memset) 87155ec0 000000e0 : printf.obj (.text:printf) 87155fa0 000000c0 : hostread.obj (.text:HOSTread) 87156060 000000c0 : hostunlink.obj (.text:HOSTunlink) 87156120 000000c0 : fflush.obj (.text:__TI_doflush) 871561e0 000000c0 : divu.obj (.text:__divu) 871562a0 000000c0 : remi.obj (.text:__remi) 87156360 000000c0 : _printfi.obj (.text:_ecpy) 87156420 000000c0 : memory.obj (.text:minsert) 871564e0 000000c0 : memory.obj (.text:mremove) 871565a0 000000c0 : tls.obj (.text:tls:init:__TI_tls_init) 87156660 000000a0 : hostclose.obj (.text:HOSTclose) 87156700 000000a0 : fopen.obj (.text:__TI_cleanup) 871567a0 000000a0 : fixfu.obj (.text:__c6xabi_fixfu) 87156840 000000a0 : fltulld.obj (.text:__c6xabi_fltulld) 871568e0 000000a0 : remu.obj (.text:__remu) 87156980 000000a0 : memory.obj (.text:_minit) 87156a20 000000a0 : memcpy64.obj (.text:memcpy) 87156ac0 000000a0 : modf.obj (.text:modf) 87156b60 00000080 : trgmsg.obj (.text:__TI_readmsg) 87156be0 00000080 : llshift.obj (.text:__c6xabi_llshl) 87156c60 00000080 : boot.obj (.text:_c_int00) 87156ce0 00000080 edma_utils.obj (.text) 87156d60 00000080 rts6600_elf.lib : fclose.obj (.text:fclose) 87156de0 00000080 : getdevice.obj (.text:finddevice) 87156e60 00000080 : unlink.obj (.text:unlink) 87156ee0 00000060 : frcmpyd_div.obj (.text:__TI_frcmpyd_div) 87156f40 00000060 : trgmsg.obj (.text:__TI_writemsg) 87156fa0 00000060 : llshift.obj (.text:__c6xabi_llshru) 87157000 00000060 : lseek.obj (.text:lseek) 87157060 00000060 : memccpy.obj (.text:memccpy) 871570c0 00000060 : write.obj (.text:write) 87157120 00000040 : isinf.obj (.text:__c6xabi_isinf) 87157160 00000040 : _printfi.obj (.text:__c6xabi_isnan) 871571a0 00000040 : call_stub.obj (.text:__call_stub) 871571e0 00000040 : strasg.obj (.text:__strasgi_64plus) 87157220 00000040 : args_main.obj (.text:_args_main) 87157260 00000040 : imath64.obj (.text:_subcull) 871572a0 00000020 : errno.obj (.text:__c6xabi_errno_addr) 871572c0 00000020 : push.obj (.text:__pop_rts) 871572e0 00000020 : push.obj (.text:__push_rts) 87157300 00000020 : _lock.obj (.text:_nop) 87157320 00000020 : fprintf.obj (.text:_outc) 87157340 00000020 : printf.obj (.text:_outc) 87157360 00000020 : fprintf.obj (.text:_outs) 87157380 00000020 : printf.obj (.text:_outs) 871573a0 00000020 : startup.obj (.text:_system_post_cinit) 871573c0 00000020 : pre_init.obj (.text:_system_pre_init) 871573e0 00000020 : exit.obj (.text:abort) 87157400 00000020 : copy_decompress_none.obj (.text:decompress:none:__TI_decompress_none) 87157420 00000020 : copy_decompress_rle.obj (.text:decompress:rle24:__TI_decompress_rle24) 87157440 00000020 edma3lld_support.obj (.text) .fardata 0 87157460 00001324 UNINITIALIZED 87157460 00001000 main.obj (.fardata:input_img) 87158460 000001e0 rts6600_elf.lib : defs.obj (.fardata:_ftable) 87158640 000000a0 : write.obj (.fardata:_stream) 871586e0 00000078 : write.obj (.fardata:_device) 87158758 0000000c : exit.obj (.fardata) 87158764 00000008 : memory.obj (.fardata) 8715876c 00000004 : _lock.obj (.fardata:_lock) 87158770 00000004 : _lock.obj (.fardata:_unlock) 87158774 00000004 : defs.obj (.fardata) 87158778 00000004 : errno.obj (.fardata) 8715877c 00000004 : fopen.obj (.fardata) 87158780 00000004 : open.obj (.fardata) .sysmem 0 80000000 07000000 UNINITIALIZED 80000000 00000008 rts6600_elf.lib : memory.obj (.sysmem) 80000008 06fffff8 --HOLE-- .switch 0 871588a8 00000084 871588a8 0000004c afq_caffe_cnn.obj (.switch:afq_caffecnn_load) 871588f4 00000038 rts6600_elf.lib : _printfi.obj (.switch:__TI_printfi) MODULE SUMMARY Module code ro data rw data ------ ---- ------- ------- .\ main.obj 544 1201276 167936 afq_caffe_cnn.obj 27648 460 0 edma_utils_autoincrement.obj 5824 28 0 edma_utils_autoincrement_v2.obj 2752 36 0 dma_resource_allocator.obj 992 0 204 dma_funcs.obj 736 4 0 edma_utils_state.obj 416 0 192 edma_utils_memcpy.obj 352 0 0 edma_utils.obj 128 16 0 edma3lld_support.obj 32 0 0 +--+---------------------------------+-------+---------+-----------+ Total: 39424 1201820 168332 D:\ti_ccsv7\ccsv7\tools\compiler\ti-cgt-c6000_8.2.2\lib\rts6600_elf.lib _printfi.obj 13696 92 0 memory.obj 2016 0 8 divd.obj 1536 0 0 fopen.obj 992 0 4 fputs.obj 992 0 0 defs.obj 0 0 804 divf.obj 640 0 0 imath64.obj 640 0 0 open.obj 544 0 4 fputc.obj 544 0 0 setvbuf.obj 544 0 0 trgmsg.obj 224 0 288 fclose.obj 448 0 0 getdevice.obj 448 0 0 imath40.obj 448 0 0 hostrename.obj 416 0 0 frexp.obj 384 0 0 write.obj 96 0 280 autoinit.obj 352 0 0 sqrt.obj 352 0 0 fprintf.obj 320 0 0 ldexp.obj 320 0 0 copy_decompress_rle.obj 288 0 0 fseek.obj 288 0 0 printf.obj 288 0 0 exit.obj 256 0 12 ctype.obj 0 257 0 _io_perm.obj 256 0 0 atoi.obj 256 0 0 cpy_tbl.obj 256 0 0 hostopen.obj 224 0 8 close.obj 224 0 0 divi.obj 224 0 0 hostlseek.obj 224 0 0 hostwrite.obj 224 0 0 llshift.obj 224 0 0 ltoa.obj 224 0 0 memset.obj 224 0 0 divu.obj 192 0 0 fflush.obj 192 0 0 hostread.obj 192 0 0 hostunlink.obj 192 0 0 remi.obj 192 0 0 tls.obj 192 0 0 fixfu.obj 160 0 0 fltulld.obj 160 0 0 hostclose.obj 160 0 0 memcpy64.obj 160 0 0 modf.obj 160 0 0 remu.obj 160 0 0 boot.obj 128 0 0 unlink.obj 128 0 0 frcmpyd_div.obj 96 0 0 lseek.obj 96 0 0 memccpy.obj 96 0 0 args_main.obj 64 0 0 call_stub.obj 64 0 0 isinf.obj 64 0 0 push.obj 64 0 0 strasg.obj 64 0 0 _lock.obj 32 0 8 errno.obj 32 0 4 copy_decompress_none.obj 32 0 0 pre_init.obj 32 0 0 startup.obj 32 0 0 +--+---------------------------------+-------+---------+-----------+ Total: 32768 349 1420 Heap: 0 0 117440512 Stack: 0 0 12288 Linker Generated: 0 4273 0 +--+---------------------------------+-------+---------+-----------+ Grand Total: 72192 1206442 117622552 LINKER GENERATED COPY TABLES __TI_cinit_table @ 871599c8 records: 4, size/record: 8, table size: 32 .fardata: load addr=87158930, load size=00001068 bytes, run addr=87157460, run size=00001324 bytes, compression=rle .far: load addr=87159998, load size=0000000d bytes, run addr=87125780, run size=000202d0 bytes, compression=rle .L2SramSect: load addr=871599a5, load size=0000000b bytes, run addr=00800000, run size=00008000 bytes, compression=rle .neardata: load addr=871599b0, load size=00000009 bytes, run addr=8712577c, run size=00000004 bytes, compression=rle LINKER GENERATED HANDLER TABLE __TI_handler_table @ 871599bc records: 2, size/record: 4, table size: 8 index: 0, handler: __TI_decompress_rle24 index: 1, handler: __TI_decompress_none i load .out to the evm whit JTAG and run follow is out time [C66xx_DSP1] Meanvalue_ti_times = 0.262565 ms im2col_ti_cov_times = 0.972233 ms mcvMatrixMulMatrixRowMajor_f32_afq_ti_cov_times = 12.518572 ms caffecnn_forward_layer_conv_ti_times = 13.755874 ms im2col_ti_cov_times = 2.175635 ms mcvMatrixMulMatrixRowMajor_f32_afq_ti_cov_times = 25.113652 ms caffecnn_forward_layer_conv_ti_times = 27.531509 ms im2col_ti_cov_times = 2.222183 ms ,,but i think the out is right but time is very big , I don't think it's normal. before i test some funcs alone in a simple project the time is samll (input image size is same) Meanvalue_ti_times +im2col_ti_cov_times +mcvMatrixMulMatrixRowMajor_f32_afq_ti_cov_times is 0.5ms so I what should I check? |