Aktív témák

  • P.H.

    senior tag

    Critital path kitakarítva, avagy egy DIV sok store-t győz.

    Counter 0 : Non-halted clock cycles
    Counter 1 : Retired instructions
    Counter 2 : Instructions per cycle (IPC)
    Counter 3 : L1 Data cache refill from RAM

    223800 2616.4 5604.4 2.1 0.0
    223850 2625.1 5869.5 2.2 0.0
    223900 2607.2 5824.5 2.2 0.0
    223950 2624.8 5906.3 2.3 0.0
    224000 2621.1 5887.6 2.2 0.0
    224050 2625.6 5880.5 2.2 0.0
    224100 2605.2 5845.0 2.2 0.0
    224150 2625.5 5871.4 2.2 0.0
    224200 2621.6 5850.6 2.2 0.0
    224250 2625.0 5846.2 2.2 0.0
    224300 2605.7 5783.7 2.2 0.0
    224350 2626.4 5367.4 2.0 0.0
    224400 2607.9 5825.7 2.2 0.0
    224450 2625.3 5696.0 2.2 0.0
    224500 2611.4 5796.3 2.2 0.0
    224550 2623.1 5902.6 2.3 0.0
    224600 2621.8 5889.7 2.2 0.0
    224650 2625.6 5879.4 2.2 0.0
    224700 2612.9 5871.2 2.2 0.0
    224750 2620.6 5908.6 2.3 0.0
    224800 2622.8 5893.5 2.2 0.0

    A kód, zárójelben a critical path-ok AMD-s integer port-leosztásával:

    mov eax,edi
    pushad
    shl ebp,02h
    xor ecx,ecx
    lea edx,[ebp+ebp*02h]
    lea edi,[ebx+ebp]
    neg ebp
    @mark0:
    sub edx,04h
    mov [ebx+edx],ecx
    jg @mark0
    @@REDUCE_ROWS:
    mov ebx,ebp
    @rowmin:
    mov esi,02000000h
    mov ecx,ebp
    xor edx,edx
    @findrowmin:
    cmp esi,[eax]
    cmovz edx,ecx
    cmova esi,[eax]
    add ecx,04h
    lea eax,[eax+04h]
    jnz @findrowmin
    sub ecx,ebp
    cmp esi,02000000h
    jz @specific
    add eax,ebp
    @subrow:
    xor edx,edx
    cmp byte ptr [eax+03h],00h
    cmovz edx,esi
    sub [eax],edx
    sub ecx,04h
    lea eax,[eax+04h]
    jnz @subrow
    jmp @reducenxrow
    @specific:
    test edx,edx
    jz @@ABNORMAL_EXIT
    bts dword ptr [edi+edx],00h
    jnc @mark
    @@ABNORMAL_EXIT:
    add esp,20h
    xor eax,eax
    mov edx,7FFFFFFFh
    stc
    ret
    @mark:
    add ecx,ebx
    sub dword ptr [esp+__SYS0],01h
    mov byte ptr [edi+ebx+02h],01h
    mov [edi+ecx*02h+__0STAR],edx
    jz @count_result_STACK
    @reducenxrow:
    add ebx,04h
    jnz @rowmin
    @@RECUDE_COLUMNS:
    sub ebx,04h
    sub eax,04h
    cmp ebx,ebp
    jl @@2ND_STEP
    test byte ptr [edi+ebx],01h
    jnz @@RECUDE_COLUMNS
    mov edx,02000000h
    mov ecx,ebp
    @findcolmin:
    cmp edx,[eax]
    cmova edx,[eax]
    add eax,ebp
    add ecx,04h
    jnz @findcolmin
    cmp edx,02000000h
    lea ecx,[ebp-04h]
    jz @@ABNORMAL_EXIT
    @subcol:
    xor esi,esi
    add ecx,04h
    jz @@RECUDE_COLUMNS
    sub eax,ebp
    cmp byte ptr [eax+03h],00h
    cmovz esi,edx
    sub [eax],esi
    jnz @subcol
    bts dword ptr [edi+ecx],10h
    jc @subcol
    bts dword ptr [edi+ebx],00h
    mov esi,ecx
    jc @subcol
    sub esi,ebp
    sub dword ptr [esp+__SYS0],01h
    mov byte ptr [eax+03h],02h
    mov [edi+esi*02h+__0STAR],ebx
    jnz @subcol
    jmp @count_result_STACK

    @@3RD_STEP:
    mov byte ptr [esi+03h],08h
    mov byte ptr [edi+ebx+03h],0FFh
    mov byte ptr [edi+edx],00h
    mov [edi+eax*02h+__COLON],ecx
    @@2ND_STEP:
    mov eax,ebp
    mov esi,[esp+__MTX]
    mov edx,00FFFFFFh
    @nx2row:
    mov bh,[edi+eax+03h]
    mov ecx,ebp
    @zeroinrow:
    {0} cmp edx,[esi]
    {1} mov bl,bh
    {2} sbb bl,[edi+ecx]
    {0} jz @minimum
    @nx2col:
    {1} add ecx,04h
    {2} lea esi,[esi+04h]
    {0} jnz @zeroinrow
    add eax,04h
    jnz @nx2row
    @@5TH_STEP:
    mov ebx,ebp
    mov esi,[esp+__MTX]
    @nx5row:
    mov eax,edx
    sub ecx,edx
    xor eax,[edi+ebx]
    cmovs edx,ecx
    mov ecx,ebp
    @decrease_row_free:
    {0} bt dword ptr [edi+ecx],00h
    {1} mov al,[esi+03h]
    {2} adc al,[edi+ebx+03h]
    {0} mov eax,00000000h
    {1} cmovz eax,edx
    {2} sub [esi],eax
    {0} add ecx,04h
    {1} lea esi,[esi+04h]
    {2} jnz @decrease_row_free
    add ebx,04h
    jnz @nx5row
    mov eax,[esp+__FREE0]
    xor edx,edx
    mov esi,eax
    sub eax,[esp+__MTX]
    idiv ebp
    neg eax
    lea ecx,[ebp+edx]
    lea eax,[ebp+eax*04h]
    @minimum:
    xor edx,edx
    mov [esp+__FREE0],esi
    add edx,[esi]
    jnz @nx2col
    @@DECIDE_NEXT_STEP:
    mov ebx,eax
    sub eax,ebp
    add edx,[edi+eax*02h+__0STAR]
    jnz @@3RD_STEP
    @@4TH_STEP:
    mov edx,[esp+__MTX]
    jmp @colon_to_star
    @0_star:
    mov ebx,eax
    mov byte ptr [esi+03h],00h
    sub esi,ecx
    sub eax,ebp
    sub esi,ebp
    mov ecx,[edi+eax*02h+__COLON]
    lea esi,[esi+ecx]
    @colon_to_star:
    mov [edi+eax*02h+__0STAR],ecx
    sub ecx,ebp
    mov byte ptr [esi+03h],02h
    lea esi,[edx+ecx]
    mov eax,ebp
    @search_star_in_column:
    cmp eax,ebx
    jz @nxstar
    test byte ptr [esi+03h],02h
    jnz @0_star
    @nxstar:
    sub esi,ebp
    add eax,04h
    jnz @search_star_in_column
    @@1ST_STEP:
    sub dword ptr [esp+__SYS0],01h
    mov ecx,ebp
    mov ebx,edi
    jz @count_result_STACK
    push dword ptr [edi]
    @restructure:
    {0} mov esi,[ebx+__COLON]
    {1} sub edx,ebp
    {2} mov [edi+ecx+03h],al
    {0} and byte ptr [edx+esi+03h],11110111b
    {1} mov esi,[ebx+__0STAR]
    {2} add ecx,04h
    {0} mov [ebx+__COLON],eax
    {1} lea ebx,[ebx+08h]
    {2} mov byte ptr [edi+esi],01h
    {0} jnz @restructure
    pop dword ptr [edi]
    jmp @@2ND_STEP
    @count_result_STACK:
    xor ecx,ecx
    neg ebp
    xor eax,eax
    mov esi,[esp+__SAVE]
    mov ebx,[esp+__MARKS]
    add esp,20h
    @results:
    {0} mov edx,[edi+ecx*02h+__0STAR]
    {1} add ecx,04h
    {2} add edx,ebp
    {0} add eax,[esi+edx]
    {1} shr edx,02h
    {2} add esi,ebp
    {0} cmp ecx,ebp
    {1} mov [ebx],dl
    {2} lea ebx,[ebx+01h]
    {0} jnz @results

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Aktív témák