ffmpeg-cuda / libavfilter /vf_chromakey_cuda.ptx
camenduru's picture
thanks to ffmpeg ❤
8ead80b
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-31833905
// Cuda compilation tools, release 11.8, V11.8.89
// Based on NVVM 7.0.1
//
.version 7.8
.target sm_60
.address_size 64
// .globl Process_uchar
.visible .entry Process_uchar(
.param .u64 Process_uchar_param_0,
.param .u64 Process_uchar_param_1,
.param .u64 Process_uchar_param_2,
.param .u64 Process_uchar_param_3,
.param .u64 Process_uchar_param_4,
.param .u64 Process_uchar_param_5,
.param .u64 Process_uchar_param_6,
.param .u32 Process_uchar_param_7,
.param .u32 Process_uchar_param_8,
.param .u32 Process_uchar_param_9,
.param .u32 Process_uchar_param_10,
.param .u32 Process_uchar_param_11,
.param .u32 Process_uchar_param_12,
.param .f32 Process_uchar_param_13,
.param .f32 Process_uchar_param_14,
.param .f32 Process_uchar_param_15,
.param .f32 Process_uchar_param_16
)
{
.reg .pred %p<78>;
.reg .b16 %rs<13>;
.reg .f32 %f<350>;
.reg .b32 %r<114>;
.reg .b64 %rd<19>;
ld.param.u64 %rd2, [Process_uchar_param_0];
ld.param.u64 %rd3, [Process_uchar_param_1];
ld.param.u64 %rd4, [Process_uchar_param_2];
ld.param.u64 %rd5, [Process_uchar_param_3];
ld.param.u64 %rd6, [Process_uchar_param_4];
ld.param.u64 %rd7, [Process_uchar_param_5];
ld.param.u64 %rd8, [Process_uchar_param_6];
ld.param.u32 %r51, [Process_uchar_param_7];
ld.param.u32 %r52, [Process_uchar_param_8];
ld.param.u32 %r53, [Process_uchar_param_9];
ld.param.u32 %r54, [Process_uchar_param_10];
ld.param.u32 %r55, [Process_uchar_param_11];
ld.param.u32 %r56, [Process_uchar_param_12];
ld.param.f32 %f42, [Process_uchar_param_13];
ld.param.f32 %f43, [Process_uchar_param_14];
ld.param.f32 %f44, [Process_uchar_param_15];
ld.param.f32 %f45, [Process_uchar_param_16];
mov.u32 %r57, %ntid.x;
mov.u32 %r58, %ctaid.x;
mov.u32 %r59, %tid.x;
mad.lo.s32 %r1, %r58, %r57, %r59;
mov.u32 %r60, %ntid.y;
mov.u32 %r61, %ctaid.y;
mov.u32 %r62, %tid.y;
mad.lo.s32 %r2, %r61, %r60, %r62;
setp.ge.s32 %p2, %r2, %r52;
setp.ge.s32 %p3, %r1, %r51;
or.pred %p4, %p3, %p2;
@%p4 bra $L__BB0_49;
cvta.to.global.u64 %rd9, %rd5;
cvt.rn.f32.s32 %f1, %r2;
cvt.rn.f32.s32 %f2, %r1;
tex.2d.v4.f32.f32 {%f46, %f47, %f48, %f49}, [%rd2, {%f2, %f1}];
mul.f32 %f50, %f46, 0f437F0000;
cvt.rzi.u32.f32 %r63, %f50;
mad.lo.s32 %r64, %r2, %r53, %r1;
cvt.s64.s32 %rd10, %r64;
add.s64 %rd11, %rd9, %rd10;
st.global.u8 [%rd11], %r63;
setp.ge.s32 %p5, %r1, %r54;
setp.ge.s32 %p6, %r2, %r55;
or.pred %p7, %p5, %p6;
@%p7 bra $L__BB0_49;
tex.2d.v4.f32.f32 {%f51, %f52, %f53, %f54}, [%rd3, {%f2, %f1}];
mad.lo.s32 %r65, %r2, %r56, %r1;
mul.f32 %f55, %f51, 0f437F0000;
cvt.rzi.u32.f32 %r66, %f55;
cvt.s64.s32 %rd12, %r65;
cvta.to.global.u64 %rd13, %rd6;
add.s64 %rd14, %rd13, %rd12;
st.global.u8 [%rd14], %r66;
tex.2d.v4.f32.f32 {%f56, %f57, %f58, %f59}, [%rd4, {%f2, %f1}];
mul.f32 %f60, %f56, 0f437F0000;
cvt.rzi.u32.f32 %r67, %f60;
cvta.to.global.u64 %rd15, %rd7;
add.s64 %rd16, %rd15, %rd12;
st.global.u8 [%rd16], %r67;
div.s32 %r3, %r51, %r54;
add.s32 %r4, %r1, -1;
cvt.rn.f32.s32 %f3, %r4;
add.s32 %r5, %r2, -1;
or.b32 %r68, %r5, %r4;
setp.lt.s32 %p8, %r68, 0;
setp.gt.s32 %p9, %r1, %r54;
or.pred %p10, %p9, %p8;
setp.gt.s32 %p11, %r2, %r55;
or.pred %p1, %p11, %p10;
mov.f32 %f334, 0f00000000;
mov.u32 %r96, 0;
setp.eq.s64 %p12, %rd4, 0;
@%p12 bra $L__BB0_21;
@%p1 bra $L__BB0_5;
cvt.rn.f32.s32 %f62, %r5;
tex.2d.v4.f32.f32 {%f63, %f64, %f65, %f66}, [%rd3, {%f3, %f62}];
tex.2d.v4.f32.f32 {%f67, %f68, %f69, %f70}, [%rd4, {%f3, %f62}];
mul.f32 %f71, %f63, 0f437F0000;
sub.f32 %f72, %f71, %f42;
mul.f32 %f73, %f67, 0f437F0000;
sub.f32 %f74, %f73, %f43;
mul.f32 %f75, %f74, %f74;
fma.rn.f32 %f76, %f72, %f72, %f75;
div.rn.f32 %f77, %f76, 0f47FE0100;
sqrt.rn.f32 %f78, %f77;
add.f32 %f334, %f78, 0f00000000;
mov.u32 %r96, 1;
$L__BB0_5:
or.b32 %r71, %r2, %r4;
setp.lt.s32 %p13, %r71, 0;
or.pred %p15, %p9, %p13;
@%p15 bra $L__BB0_7;
tex.2d.v4.f32.f32 {%f79, %f80, %f81, %f82}, [%rd3, {%f3, %f1}];
tex.2d.v4.f32.f32 {%f83, %f84, %f85, %f86}, [%rd4, {%f3, %f1}];
mul.f32 %f87, %f79, 0f437F0000;
sub.f32 %f88, %f87, %f42;
mul.f32 %f89, %f83, 0f437F0000;
sub.f32 %f90, %f89, %f43;
mul.f32 %f91, %f90, %f90;
fma.rn.f32 %f92, %f88, %f88, %f91;
div.rn.f32 %f93, %f92, 0f47FE0100;
sqrt.rn.f32 %f94, %f93;
add.f32 %f334, %f334, %f94;
add.s32 %r96, %r96, 1;
$L__BB0_7:
add.s32 %r9, %r2, 1;
or.b32 %r72, %r9, %r4;
setp.lt.s32 %p16, %r72, 0;
or.pred %p18, %p9, %p16;
setp.ge.s32 %p19, %r9, %r55;
or.pred %p20, %p19, %p18;
@%p20 bra $L__BB0_9;
cvt.rn.f32.s32 %f95, %r9;
tex.2d.v4.f32.f32 {%f96, %f97, %f98, %f99}, [%rd3, {%f3, %f95}];
tex.2d.v4.f32.f32 {%f100, %f101, %f102, %f103}, [%rd4, {%f3, %f95}];
mul.f32 %f104, %f96, 0f437F0000;
sub.f32 %f105, %f104, %f42;
mul.f32 %f106, %f100, 0f437F0000;
sub.f32 %f107, %f106, %f43;
mul.f32 %f108, %f107, %f107;
fma.rn.f32 %f109, %f105, %f105, %f108;
div.rn.f32 %f110, %f109, 0f47FE0100;
sqrt.rn.f32 %f111, %f110;
add.f32 %f334, %f334, %f111;
add.s32 %r96, %r96, 1;
$L__BB0_9:
or.b32 %r73, %r5, %r1;
setp.lt.s32 %p21, %r73, 0;
or.pred %p23, %p11, %p21;
@%p23 bra $L__BB0_11;
cvt.rn.f32.s32 %f112, %r5;
tex.2d.v4.f32.f32 {%f113, %f114, %f115, %f116}, [%rd3, {%f2, %f112}];
tex.2d.v4.f32.f32 {%f117, %f118, %f119, %f120}, [%rd4, {%f2, %f112}];
mul.f32 %f121, %f113, 0f437F0000;
sub.f32 %f122, %f121, %f42;
mul.f32 %f123, %f117, 0f437F0000;
sub.f32 %f124, %f123, %f43;
mul.f32 %f125, %f124, %f124;
fma.rn.f32 %f126, %f122, %f122, %f125;
div.rn.f32 %f127, %f126, 0f47FE0100;
sqrt.rn.f32 %f128, %f127;
add.f32 %f334, %f334, %f128;
add.s32 %r96, %r96, 1;
$L__BB0_11:
or.b32 %r74, %r2, %r1;
setp.lt.s32 %p24, %r74, 0;
@%p24 bra $L__BB0_13;
tex.2d.v4.f32.f32 {%f129, %f130, %f131, %f132}, [%rd3, {%f2, %f1}];
tex.2d.v4.f32.f32 {%f133, %f134, %f135, %f136}, [%rd4, {%f2, %f1}];
mul.f32 %f137, %f129, 0f437F0000;
sub.f32 %f138, %f137, %f42;
mul.f32 %f139, %f133, 0f437F0000;
sub.f32 %f140, %f139, %f43;
mul.f32 %f141, %f140, %f140;
fma.rn.f32 %f142, %f138, %f138, %f141;
div.rn.f32 %f143, %f142, 0f47FE0100;
sqrt.rn.f32 %f144, %f143;
add.f32 %f334, %f334, %f144;
add.s32 %r96, %r96, 1;
$L__BB0_13:
or.b32 %r75, %r9, %r1;
setp.lt.s32 %p25, %r75, 0;
or.pred %p27, %p19, %p25;
@%p27 bra $L__BB0_15;
cvt.rn.f32.s32 %f145, %r9;
tex.2d.v4.f32.f32 {%f146, %f147, %f148, %f149}, [%rd3, {%f2, %f145}];
tex.2d.v4.f32.f32 {%f150, %f151, %f152, %f153}, [%rd4, {%f2, %f145}];
mul.f32 %f154, %f146, 0f437F0000;
sub.f32 %f155, %f154, %f42;
mul.f32 %f156, %f150, 0f437F0000;
sub.f32 %f157, %f156, %f43;
mul.f32 %f158, %f157, %f157;
fma.rn.f32 %f159, %f155, %f155, %f158;
div.rn.f32 %f160, %f159, 0f47FE0100;
sqrt.rn.f32 %f161, %f160;
add.f32 %f334, %f334, %f161;
add.s32 %r96, %r96, 1;
$L__BB0_15:
add.s32 %r18, %r1, 1;
setp.ge.s32 %p28, %r18, %r54;
cvt.rn.f32.s32 %f16, %r18;
or.b32 %r76, %r5, %r18;
setp.lt.s32 %p29, %r76, 0;
or.pred %p30, %p28, %p29;
or.pred %p32, %p11, %p30;
@%p32 bra $L__BB0_17;
cvt.rn.f32.s32 %f162, %r5;
tex.2d.v4.f32.f32 {%f163, %f164, %f165, %f166}, [%rd3, {%f16, %f162}];
tex.2d.v4.f32.f32 {%f167, %f168, %f169, %f170}, [%rd4, {%f16, %f162}];
mul.f32 %f171, %f163, 0f437F0000;
mul.f32 %f172, %f167, 0f437F0000;
sub.f32 %f173, %f171, %f42;
sub.f32 %f174, %f172, %f43;
mul.f32 %f175, %f174, %f174;
fma.rn.f32 %f176, %f173, %f173, %f175;
div.rn.f32 %f177, %f176, 0f47FE0100;
sqrt.rn.f32 %f178, %f177;
add.f32 %f334, %f334, %f178;
add.s32 %r96, %r96, 1;
$L__BB0_17:
or.b32 %r77, %r2, %r18;
setp.lt.s32 %p33, %r77, 0;
or.pred %p35, %p28, %p33;
@%p35 bra $L__BB0_19;
tex.2d.v4.f32.f32 {%f179, %f180, %f181, %f182}, [%rd3, {%f16, %f1}];
tex.2d.v4.f32.f32 {%f183, %f184, %f185, %f186}, [%rd4, {%f16, %f1}];
mul.f32 %f187, %f179, 0f437F0000;
mul.f32 %f188, %f183, 0f437F0000;
sub.f32 %f189, %f187, %f42;
sub.f32 %f190, %f188, %f43;
mul.f32 %f191, %f190, %f190;
fma.rn.f32 %f192, %f189, %f189, %f191;
div.rn.f32 %f193, %f192, 0f47FE0100;
sqrt.rn.f32 %f194, %f193;
add.f32 %f334, %f334, %f194;
add.s32 %r96, %r96, 1;
$L__BB0_19:
or.b32 %r78, %r9, %r18;
setp.lt.s32 %p36, %r78, 0;
or.pred %p38, %p28, %p36;
or.pred %p40, %p19, %p38;
@%p40 bra $L__BB0_39;
cvt.rn.f32.s32 %f195, %r9;
tex.2d.v4.f32.f32 {%f196, %f197, %f198, %f199}, [%rd3, {%f16, %f195}];
tex.2d.v4.f32.f32 {%f200, %f201, %f202, %f203}, [%rd4, {%f16, %f195}];
mul.f32 %f204, %f196, 0f437F0000;
mul.f32 %f205, %f200, 0f437F0000;
sub.f32 %f206, %f204, %f42;
sub.f32 %f207, %f205, %f43;
mul.f32 %f208, %f207, %f207;
fma.rn.f32 %f209, %f206, %f206, %f208;
div.rn.f32 %f210, %f209, 0f47FE0100;
sqrt.rn.f32 %f211, %f210;
add.f32 %f334, %f334, %f211;
add.s32 %r96, %r96, 1;
bra.uni $L__BB0_39;
$L__BB0_21:
@%p1 bra $L__BB0_23;
cvt.rn.f32.s32 %f213, %r5;
tex.2d.v4.f32.f32 {%f214, %f215, %f216, %f217}, [%rd3, {%f3, %f213}];
mul.f32 %f218, %f214, 0f437F0000;
sub.f32 %f219, %f218, %f42;
mul.f32 %f220, %f215, 0f437F0000;
sub.f32 %f221, %f220, %f43;
mul.f32 %f222, %f221, %f221;
fma.rn.f32 %f223, %f219, %f219, %f222;
div.rn.f32 %f224, %f223, 0f47FE0100;
sqrt.rn.f32 %f225, %f224;
add.f32 %f334, %f225, 0f00000000;
mov.u32 %r96, 1;
$L__BB0_23:
or.b32 %r81, %r2, %r4;
setp.lt.s32 %p41, %r81, 0;
or.pred %p43, %p9, %p41;
@%p43 bra $L__BB0_25;
tex.2d.v4.f32.f32 {%f226, %f227, %f228, %f229}, [%rd3, {%f3, %f1}];
mul.f32 %f230, %f226, 0f437F0000;
sub.f32 %f231, %f230, %f42;
mul.f32 %f232, %f227, 0f437F0000;
sub.f32 %f233, %f232, %f43;
mul.f32 %f234, %f233, %f233;
fma.rn.f32 %f235, %f231, %f231, %f234;
div.rn.f32 %f236, %f235, 0f47FE0100;
sqrt.rn.f32 %f237, %f236;
add.f32 %f334, %f334, %f237;
add.s32 %r96, %r96, 1;
$L__BB0_25:
add.s32 %r27, %r2, 1;
or.b32 %r82, %r27, %r4;
setp.lt.s32 %p44, %r82, 0;
or.pred %p46, %p9, %p44;
setp.ge.s32 %p47, %r27, %r55;
or.pred %p48, %p47, %p46;
@%p48 bra $L__BB0_27;
cvt.rn.f32.s32 %f238, %r27;
tex.2d.v4.f32.f32 {%f239, %f240, %f241, %f242}, [%rd3, {%f3, %f238}];
mul.f32 %f243, %f239, 0f437F0000;
sub.f32 %f244, %f243, %f42;
mul.f32 %f245, %f240, 0f437F0000;
sub.f32 %f246, %f245, %f43;
mul.f32 %f247, %f246, %f246;
fma.rn.f32 %f248, %f244, %f244, %f247;
div.rn.f32 %f249, %f248, 0f47FE0100;
sqrt.rn.f32 %f250, %f249;
add.f32 %f334, %f334, %f250;
add.s32 %r96, %r96, 1;
$L__BB0_27:
or.b32 %r83, %r5, %r1;
setp.lt.s32 %p49, %r83, 0;
or.pred %p51, %p11, %p49;
@%p51 bra $L__BB0_29;
cvt.rn.f32.s32 %f251, %r5;
tex.2d.v4.f32.f32 {%f252, %f253, %f254, %f255}, [%rd3, {%f2, %f251}];
mul.f32 %f256, %f252, 0f437F0000;
sub.f32 %f257, %f256, %f42;
mul.f32 %f258, %f253, 0f437F0000;
sub.f32 %f259, %f258, %f43;
mul.f32 %f260, %f259, %f259;
fma.rn.f32 %f261, %f257, %f257, %f260;
div.rn.f32 %f262, %f261, 0f47FE0100;
sqrt.rn.f32 %f263, %f262;
add.f32 %f334, %f334, %f263;
add.s32 %r96, %r96, 1;
$L__BB0_29:
or.b32 %r84, %r2, %r1;
setp.lt.s32 %p52, %r84, 0;
@%p52 bra $L__BB0_31;
tex.2d.v4.f32.f32 {%f264, %f265, %f266, %f267}, [%rd3, {%f2, %f1}];
mul.f32 %f268, %f264, 0f437F0000;
sub.f32 %f269, %f268, %f42;
mul.f32 %f270, %f265, 0f437F0000;
sub.f32 %f271, %f270, %f43;
mul.f32 %f272, %f271, %f271;
fma.rn.f32 %f273, %f269, %f269, %f272;
div.rn.f32 %f274, %f273, 0f47FE0100;
sqrt.rn.f32 %f275, %f274;
add.f32 %f334, %f334, %f275;
add.s32 %r96, %r96, 1;
$L__BB0_31:
or.b32 %r85, %r27, %r1;
setp.lt.s32 %p53, %r85, 0;
or.pred %p55, %p47, %p53;
@%p55 bra $L__BB0_33;
cvt.rn.f32.s32 %f276, %r27;
tex.2d.v4.f32.f32 {%f277, %f278, %f279, %f280}, [%rd3, {%f2, %f276}];
mul.f32 %f281, %f277, 0f437F0000;
sub.f32 %f282, %f281, %f42;
mul.f32 %f283, %f278, 0f437F0000;
sub.f32 %f284, %f283, %f43;
mul.f32 %f285, %f284, %f284;
fma.rn.f32 %f286, %f282, %f282, %f285;
div.rn.f32 %f287, %f286, 0f47FE0100;
sqrt.rn.f32 %f288, %f287;
add.f32 %f334, %f334, %f288;
add.s32 %r96, %r96, 1;
$L__BB0_33:
add.s32 %r36, %r1, 1;
setp.ge.s32 %p56, %r36, %r54;
cvt.rn.f32.s32 %f34, %r36;
or.b32 %r86, %r5, %r36;
setp.lt.s32 %p57, %r86, 0;
or.pred %p58, %p56, %p57;
or.pred %p60, %p11, %p58;
@%p60 bra $L__BB0_35;
cvt.rn.f32.s32 %f289, %r5;
tex.2d.v4.f32.f32 {%f290, %f291, %f292, %f293}, [%rd3, {%f34, %f289}];
mul.f32 %f294, %f290, 0f437F0000;
mul.f32 %f295, %f291, 0f437F0000;
sub.f32 %f296, %f294, %f42;
sub.f32 %f297, %f295, %f43;
mul.f32 %f298, %f297, %f297;
fma.rn.f32 %f299, %f296, %f296, %f298;
div.rn.f32 %f300, %f299, 0f47FE0100;
sqrt.rn.f32 %f301, %f300;
add.f32 %f334, %f334, %f301;
add.s32 %r96, %r96, 1;
$L__BB0_35:
or.b32 %r87, %r2, %r36;
setp.lt.s32 %p61, %r87, 0;
or.pred %p63, %p56, %p61;
@%p63 bra $L__BB0_37;
tex.2d.v4.f32.f32 {%f302, %f303, %f304, %f305}, [%rd3, {%f34, %f1}];
mul.f32 %f306, %f302, 0f437F0000;
mul.f32 %f307, %f303, 0f437F0000;
sub.f32 %f308, %f306, %f42;
sub.f32 %f309, %f307, %f43;
mul.f32 %f310, %f309, %f309;
fma.rn.f32 %f311, %f308, %f308, %f310;
div.rn.f32 %f312, %f311, 0f47FE0100;
sqrt.rn.f32 %f313, %f312;
add.f32 %f334, %f334, %f313;
add.s32 %r96, %r96, 1;
$L__BB0_37:
or.b32 %r88, %r27, %r36;
setp.lt.s32 %p64, %r88, 0;
or.pred %p66, %p56, %p64;
or.pred %p68, %p47, %p66;
@%p68 bra $L__BB0_39;
cvt.rn.f32.s32 %f314, %r27;
tex.2d.v4.f32.f32 {%f315, %f316, %f317, %f318}, [%rd3, {%f34, %f314}];
mul.f32 %f319, %f315, 0f437F0000;
mul.f32 %f320, %f316, 0f437F0000;
sub.f32 %f321, %f319, %f42;
sub.f32 %f322, %f320, %f43;
mul.f32 %f323, %f322, %f322;
fma.rn.f32 %f324, %f321, %f321, %f323;
div.rn.f32 %f325, %f324, 0f47FE0100;
sqrt.rn.f32 %f326, %f325;
add.f32 %f334, %f334, %f326;
add.s32 %r96, %r96, 1;
$L__BB0_39:
cvt.rn.f32.s32 %f327, %r96;
setp.gt.s32 %p69, %r96, 0;
selp.f32 %f328, %f327, 0f41100000, %p69;
div.rn.f32 %f41, %f334, %f328;
setp.gt.f32 %p70, %f45, 0f38D1B717;
@%p70 bra $L__BB0_41;
bra.uni $L__BB0_40;
$L__BB0_41:
sub.f32 %f329, %f41, %f44;
div.rn.f32 %f330, %f329, %f45;
cvt.sat.f32.f32 %f331, %f330;
mul.f32 %f332, %f331, 0f437F0000;
cvt.rzi.u32.f32 %r89, %f332;
cvt.u16.u32 %rs10, %r89;
bra.uni $L__BB0_42;
$L__BB0_40:
setp.geu.f32 %p71, %f41, %f44;
selp.b16 %rs10, -1, 0, %p71;
$L__BB0_42:
setp.lt.s32 %p72, %r3, 1;
@%p72 bra $L__BB0_49;
mul.lo.s32 %r43, %r3, %r1;
mul.lo.s32 %r44, %r3, %r2;
cvta.to.global.u64 %rd1, %rd8;
mov.u16 %rs8, 0;
mov.u32 %r90, 0;
mov.u32 %r112, %r90;
mov.u16 %rs11, %rs8;
$L__BB0_44:
add.s32 %r46, %r112, %r43;
mov.u32 %r113, %r90;
mov.u16 %rs12, %rs8;
$L__BB0_45:
add.s32 %r48, %r113, %r44;
setp.ge.s32 %p73, %r48, %r52;
setp.ge.s32 %p74, %r46, %r51;
or.pred %p75, %p74, %p73;
@%p75 bra $L__BB0_47;
mad.lo.s32 %r92, %r48, %r53, %r46;
cvt.s64.s32 %rd17, %r92;
add.s64 %rd18, %rd1, %rd17;
st.global.u8 [%rd18], %rs10;
$L__BB0_47:
add.s16 %rs12, %rs12, 1;
cvt.u32.u16 %r93, %rs12;
and.b32 %r113, %r93, 255;
setp.gt.s32 %p76, %r3, %r113;
@%p76 bra $L__BB0_45;
add.s16 %rs11, %rs11, 1;
cvt.u32.u16 %r94, %rs11;
and.b32 %r112, %r94, 255;
setp.gt.s32 %p77, %r3, %r112;
@%p77 bra $L__BB0_44;
$L__BB0_49:
ret;
}
// .globl Process_uchar2
.visible .entry Process_uchar2(
.param .u64 Process_uchar2_param_0,
.param .u64 Process_uchar2_param_1,
.param .u64 Process_uchar2_param_2,
.param .u64 Process_uchar2_param_3,
.param .u64 Process_uchar2_param_4,
.param .u64 Process_uchar2_param_5,
.param .u64 Process_uchar2_param_6,
.param .u32 Process_uchar2_param_7,
.param .u32 Process_uchar2_param_8,
.param .u32 Process_uchar2_param_9,
.param .u32 Process_uchar2_param_10,
.param .u32 Process_uchar2_param_11,
.param .u32 Process_uchar2_param_12,
.param .f32 Process_uchar2_param_13,
.param .f32 Process_uchar2_param_14,
.param .f32 Process_uchar2_param_15,
.param .f32 Process_uchar2_param_16
)
{
.reg .pred %p<49>;
.reg .b16 %rs<13>;
.reg .f32 %f<169>;
.reg .b32 %r<78>;
.reg .b64 %rd<18>;
ld.param.u64 %rd2, [Process_uchar2_param_0];
ld.param.u64 %rd3, [Process_uchar2_param_1];
ld.param.u64 %rd4, [Process_uchar2_param_3];
ld.param.u64 %rd5, [Process_uchar2_param_4];
ld.param.u64 %rd6, [Process_uchar2_param_5];
ld.param.u64 %rd7, [Process_uchar2_param_6];
ld.param.u32 %r33, [Process_uchar2_param_7];
ld.param.u32 %r34, [Process_uchar2_param_8];
ld.param.u32 %r35, [Process_uchar2_param_9];
ld.param.u32 %r36, [Process_uchar2_param_10];
ld.param.u32 %r37, [Process_uchar2_param_11];
ld.param.u32 %r38, [Process_uchar2_param_12];
ld.param.f32 %f24, [Process_uchar2_param_13];
ld.param.f32 %f25, [Process_uchar2_param_14];
ld.param.f32 %f26, [Process_uchar2_param_15];
ld.param.f32 %f27, [Process_uchar2_param_16];
mov.u32 %r39, %ntid.x;
mov.u32 %r40, %ctaid.x;
mov.u32 %r41, %tid.x;
mad.lo.s32 %r1, %r40, %r39, %r41;
mov.u32 %r42, %ntid.y;
mov.u32 %r43, %ctaid.y;
mov.u32 %r44, %tid.y;
mad.lo.s32 %r2, %r43, %r42, %r44;
setp.ge.s32 %p1, %r2, %r34;
setp.ge.s32 %p2, %r1, %r33;
or.pred %p3, %p2, %p1;
@%p3 bra $L__BB1_30;
cvta.to.global.u64 %rd8, %rd4;
cvt.rn.f32.s32 %f1, %r2;
cvt.rn.f32.s32 %f2, %r1;
tex.2d.v4.f32.f32 {%f28, %f29, %f30, %f31}, [%rd2, {%f2, %f1}];
mul.f32 %f32, %f28, 0f437F0000;
cvt.rzi.u32.f32 %r45, %f32;
mad.lo.s32 %r46, %r2, %r35, %r1;
cvt.s64.s32 %rd9, %r46;
add.s64 %rd10, %rd8, %rd9;
st.global.u8 [%rd10], %r45;
setp.ge.s32 %p4, %r1, %r36;
setp.ge.s32 %p5, %r2, %r37;
or.pred %p6, %p4, %p5;
@%p6 bra $L__BB1_30;
mad.lo.s32 %r48, %r2, %r38, %r1;
tex.2d.v4.f32.f32 {%f34, %f35, %f36, %f37}, [%rd3, {%f2, %f1}];
mul.f32 %f38, %f34, 0f437F0000;
cvt.rzi.u32.f32 %r49, %f38;
cvt.s64.s32 %rd11, %r48;
cvta.to.global.u64 %rd12, %rd5;
add.s64 %rd13, %rd12, %rd11;
st.global.u8 [%rd13], %r49;
mul.f32 %f39, %f35, 0f437F0000;
cvt.rzi.u32.f32 %r50, %f39;
cvta.to.global.u64 %rd14, %rd6;
add.s64 %rd15, %rd14, %rd11;
st.global.u8 [%rd15], %r50;
add.s32 %r3, %r1, -1;
cvt.rn.f32.s32 %f3, %r3;
add.s32 %r4, %r2, -1;
or.b32 %r51, %r4, %r3;
setp.lt.s32 %p7, %r51, 0;
mov.u32 %r68, 0;
setp.gt.s32 %p8, %r1, %r36;
or.pred %p9, %p8, %p7;
setp.gt.s32 %p10, %r2, %r37;
or.pred %p11, %p10, %p9;
mov.f32 %f161, 0f00000000;
@%p11 bra $L__BB1_4;
cvt.rn.f32.s32 %f40, %r4;
tex.2d.v4.f32.f32 {%f41, %f42, %f43, %f44}, [%rd3, {%f3, %f40}];
mul.f32 %f45, %f41, 0f437F0000;
sub.f32 %f46, %f45, %f24;
mul.f32 %f47, %f42, 0f437F0000;
sub.f32 %f48, %f47, %f25;
mul.f32 %f49, %f48, %f48;
fma.rn.f32 %f50, %f46, %f46, %f49;
div.rn.f32 %f51, %f50, 0f47FE0100;
sqrt.rn.f32 %f52, %f51;
add.f32 %f161, %f52, 0f00000000;
mov.u32 %r68, 1;
$L__BB1_4:
or.b32 %r53, %r2, %r3;
setp.lt.s32 %p12, %r53, 0;
or.pred %p14, %p8, %p12;
@%p14 bra $L__BB1_6;
tex.2d.v4.f32.f32 {%f53, %f54, %f55, %f56}, [%rd3, {%f3, %f1}];
mul.f32 %f57, %f53, 0f437F0000;
sub.f32 %f58, %f57, %f24;
mul.f32 %f59, %f54, 0f437F0000;
sub.f32 %f60, %f59, %f25;
mul.f32 %f61, %f60, %f60;
fma.rn.f32 %f62, %f58, %f58, %f61;
div.rn.f32 %f63, %f62, 0f47FE0100;
sqrt.rn.f32 %f64, %f63;
add.f32 %f161, %f161, %f64;
add.s32 %r68, %r68, 1;
$L__BB1_6:
add.s32 %r8, %r2, 1;
or.b32 %r54, %r8, %r3;
setp.lt.s32 %p15, %r54, 0;
or.pred %p17, %p8, %p15;
setp.ge.s32 %p18, %r8, %r37;
or.pred %p19, %p18, %p17;
@%p19 bra $L__BB1_8;
cvt.rn.f32.s32 %f65, %r8;
tex.2d.v4.f32.f32 {%f66, %f67, %f68, %f69}, [%rd3, {%f3, %f65}];
mul.f32 %f70, %f66, 0f437F0000;
sub.f32 %f71, %f70, %f24;
mul.f32 %f72, %f67, 0f437F0000;
sub.f32 %f73, %f72, %f25;
mul.f32 %f74, %f73, %f73;
fma.rn.f32 %f75, %f71, %f71, %f74;
div.rn.f32 %f76, %f75, 0f47FE0100;
sqrt.rn.f32 %f77, %f76;
add.f32 %f161, %f161, %f77;
add.s32 %r68, %r68, 1;
$L__BB1_8:
or.b32 %r55, %r4, %r1;
setp.lt.s32 %p20, %r55, 0;
or.pred %p22, %p10, %p20;
@%p22 bra $L__BB1_10;
cvt.rn.f32.s32 %f78, %r4;
tex.2d.v4.f32.f32 {%f79, %f80, %f81, %f82}, [%rd3, {%f2, %f78}];
mul.f32 %f83, %f79, 0f437F0000;
sub.f32 %f84, %f83, %f24;
mul.f32 %f85, %f80, 0f437F0000;
sub.f32 %f86, %f85, %f25;
mul.f32 %f87, %f86, %f86;
fma.rn.f32 %f88, %f84, %f84, %f87;
div.rn.f32 %f89, %f88, 0f47FE0100;
sqrt.rn.f32 %f90, %f89;
add.f32 %f161, %f161, %f90;
add.s32 %r68, %r68, 1;
$L__BB1_10:
or.b32 %r56, %r2, %r1;
setp.lt.s32 %p23, %r56, 0;
@%p23 bra $L__BB1_12;
tex.2d.v4.f32.f32 {%f91, %f92, %f93, %f94}, [%rd3, {%f2, %f1}];
mul.f32 %f95, %f91, 0f437F0000;
sub.f32 %f96, %f95, %f24;
mul.f32 %f97, %f92, 0f437F0000;
sub.f32 %f98, %f97, %f25;
mul.f32 %f99, %f98, %f98;
fma.rn.f32 %f100, %f96, %f96, %f99;
div.rn.f32 %f101, %f100, 0f47FE0100;
sqrt.rn.f32 %f102, %f101;
add.f32 %f161, %f161, %f102;
add.s32 %r68, %r68, 1;
$L__BB1_12:
or.b32 %r57, %r8, %r1;
setp.lt.s32 %p24, %r57, 0;
or.pred %p26, %p18, %p24;
@%p26 bra $L__BB1_14;
cvt.rn.f32.s32 %f103, %r8;
tex.2d.v4.f32.f32 {%f104, %f105, %f106, %f107}, [%rd3, {%f2, %f103}];
mul.f32 %f108, %f104, 0f437F0000;
sub.f32 %f109, %f108, %f24;
mul.f32 %f110, %f105, 0f437F0000;
sub.f32 %f111, %f110, %f25;
mul.f32 %f112, %f111, %f111;
fma.rn.f32 %f113, %f109, %f109, %f112;
div.rn.f32 %f114, %f113, 0f47FE0100;
sqrt.rn.f32 %f115, %f114;
add.f32 %f161, %f161, %f115;
add.s32 %r68, %r68, 1;
$L__BB1_14:
add.s32 %r17, %r1, 1;
setp.ge.s32 %p27, %r17, %r36;
cvt.rn.f32.s32 %f16, %r17;
or.b32 %r58, %r4, %r17;
setp.lt.s32 %p28, %r58, 0;
or.pred %p29, %p27, %p28;
or.pred %p31, %p10, %p29;
@%p31 bra $L__BB1_16;
cvt.rn.f32.s32 %f116, %r4;
tex.2d.v4.f32.f32 {%f117, %f118, %f119, %f120}, [%rd3, {%f16, %f116}];
mul.f32 %f121, %f117, 0f437F0000;
sub.f32 %f122, %f121, %f24;
mul.f32 %f123, %f118, 0f437F0000;
sub.f32 %f124, %f123, %f25;
mul.f32 %f125, %f124, %f124;
fma.rn.f32 %f126, %f122, %f122, %f125;
div.rn.f32 %f127, %f126, 0f47FE0100;
sqrt.rn.f32 %f128, %f127;
add.f32 %f161, %f161, %f128;
add.s32 %r68, %r68, 1;
$L__BB1_16:
or.b32 %r59, %r2, %r17;
setp.lt.s32 %p32, %r59, 0;
or.pred %p34, %p27, %p32;
@%p34 bra $L__BB1_18;
tex.2d.v4.f32.f32 {%f129, %f130, %f131, %f132}, [%rd3, {%f16, %f1}];
mul.f32 %f133, %f129, 0f437F0000;
sub.f32 %f134, %f133, %f24;
mul.f32 %f135, %f130, 0f437F0000;
sub.f32 %f136, %f135, %f25;
mul.f32 %f137, %f136, %f136;
fma.rn.f32 %f138, %f134, %f134, %f137;
div.rn.f32 %f139, %f138, 0f47FE0100;
sqrt.rn.f32 %f140, %f139;
add.f32 %f161, %f161, %f140;
add.s32 %r68, %r68, 1;
$L__BB1_18:
or.b32 %r60, %r8, %r17;
setp.lt.s32 %p35, %r60, 0;
or.pred %p37, %p27, %p35;
or.pred %p39, %p18, %p37;
@%p39 bra $L__BB1_20;
cvt.rn.f32.s32 %f141, %r8;
tex.2d.v4.f32.f32 {%f142, %f143, %f144, %f145}, [%rd3, {%f16, %f141}];
mul.f32 %f146, %f142, 0f437F0000;
sub.f32 %f147, %f146, %f24;
mul.f32 %f148, %f143, 0f437F0000;
sub.f32 %f149, %f148, %f25;
mul.f32 %f150, %f149, %f149;
fma.rn.f32 %f151, %f147, %f147, %f150;
div.rn.f32 %f152, %f151, 0f47FE0100;
sqrt.rn.f32 %f153, %f152;
add.f32 %f161, %f161, %f153;
add.s32 %r68, %r68, 1;
$L__BB1_20:
div.s32 %r24, %r33, %r36;
cvt.rn.f32.s32 %f154, %r68;
setp.gt.s32 %p40, %r68, 0;
selp.f32 %f155, %f154, 0f41100000, %p40;
div.rn.f32 %f23, %f161, %f155;
setp.gt.f32 %p41, %f27, 0f38D1B717;
@%p41 bra $L__BB1_22;
bra.uni $L__BB1_21;
$L__BB1_22:
sub.f32 %f156, %f23, %f26;
div.rn.f32 %f157, %f156, %f27;
cvt.sat.f32.f32 %f158, %f157;
mul.f32 %f159, %f158, 0f437F0000;
cvt.rzi.u32.f32 %r61, %f159;
cvt.u16.u32 %rs10, %r61;
bra.uni $L__BB1_23;
$L__BB1_21:
setp.geu.f32 %p42, %f23, %f26;
selp.b16 %rs10, -1, 0, %p42;
$L__BB1_23:
setp.lt.s32 %p43, %r24, 1;
@%p43 bra $L__BB1_30;
mul.lo.s32 %r25, %r24, %r1;
mul.lo.s32 %r26, %r24, %r2;
cvta.to.global.u64 %rd1, %rd7;
mov.u16 %rs8, 0;
mov.u32 %r62, 0;
mov.u32 %r76, %r62;
mov.u16 %rs11, %rs8;
$L__BB1_25:
add.s32 %r28, %r76, %r25;
mov.u32 %r77, %r62;
mov.u16 %rs12, %rs8;
$L__BB1_26:
add.s32 %r30, %r77, %r26;
setp.ge.s32 %p44, %r30, %r34;
setp.ge.s32 %p45, %r28, %r33;
or.pred %p46, %p45, %p44;
@%p46 bra $L__BB1_28;
mad.lo.s32 %r64, %r30, %r35, %r28;
cvt.s64.s32 %rd16, %r64;
add.s64 %rd17, %rd1, %rd16;
st.global.u8 [%rd17], %rs10;
$L__BB1_28:
add.s16 %rs12, %rs12, 1;
cvt.u32.u16 %r65, %rs12;
and.b32 %r77, %r65, 255;
setp.gt.s32 %p47, %r24, %r77;
@%p47 bra $L__BB1_26;
add.s16 %rs11, %rs11, 1;
cvt.u32.u16 %r66, %rs11;
and.b32 %r76, %r66, 255;
setp.gt.s32 %p48, %r24, %r76;
@%p48 bra $L__BB1_25;
$L__BB1_30:
ret;
}