Aktív témák

  • P.H.

    senior tag

    Magyar módszer, legfejlebb 126x126-os mátrixokra, a mátrixelemek nagysága legfeljebb 00FFFFFFh. A sebességkülönbség kb. 3-szoros az új kód javára úgy, hogy a mátrix teljesen belefér (és belefért régen is) az L1D-be.

    Jelenlegi kód:
    pushad
    @@REDUCE_ROWS:
    mov esi,00FFFFFFh
    mov edi,eax
    mov edx,ebp
    @rowmin:
    mov eax,esi
    mov ebx,edi
    mov ecx,ebp
    @findrowmin:
    cmp eax,[edi]
    cmova eax,[edi]
    sub ecx,01h
    lea edi,[edi+04h]
    jnz @findrowmin
    cmp eax,esi
    mov ecx,ebp
    jz @@ABNORMAL_EXIT
    @decrow:
    xor edi,edi
    cmp [ebx],esi
    cmovbe edi,eax
    sub [ebx],edi
    sub ecx,01h
    lea ebx,[ebx+04h]
    jnz @decrow
    sub edx,01h
    mov edi,ebx
    jnz @rowmin
    jmp @@REDUCE_COLUMNS
    @@ABNORMAL_EXIT:
    add esp,20h
    xor eax,eax
    mov edx,7FFFFFFFh
    stc
    ret
    @@REDUCE_COLUMNS:
    mov ebx,ebp
    mov edi,[esp+__MTX]
    @colmin:
    mov eax,esi
    mov ecx,ebp
    @findcolmin:
    cmp eax,[edi]
    cmova eax,[edi]
    sub ecx,01h
    lea edi,[edi+ebp*04h]
    jnz @findcolmin
    neg ebp
    cmp eax,esi
    mov ecx,ebp
    jz @@ABNORMAL_EXIT
    @deccol:
    lea edi,[edi+ebp*04]
    xor edx,edx
    cmp [edi],esi
    cmovbe edx,eax
    sub [edi],edx
    add ecx,01h
    jnz @deccol
    neg ebp
    sub ebx,01h
    lea edi,[edi+04h]
    jnz @colmin
    @@INITMARKS:
    mov ebx,offset(MARKS)
    lea ecx,[ebp+ebp]
    xor eax,eax
    lea edi,[ebx+ebp]
    mov esi,[esp+__MTX]
    @clrmark:
    sub ecx,04h
    mov [ebx],eax
    lea ebx,[ebx+04h]
    jg @clrmark
    @@START0_SYSTEM:
    xor ebx,ebx
    lea esi,[esi+ebp*04h]
    add eax,01h
    xor ecx,ecx
    @start0system:
    sub esi,04h
    xor edx,edx
    sub ebx,eax
    push esi
    sub edx,ebp
    @findfree0:
    cmp [esi],eax
    jb @markitem
    @nxrow:
    add edx,eax
    lea esi,[esi+ebp*04h]
    jnz @findfree0
    jmp @nxcol
    @markitem:
    bts dword ptr [edi+edx],01h
    jc @nxrow
    add edx,ebp
    mov [esi+03h],al
    add ecx,eax
    mov [edi+edx],bl
    or [edi+ebx],al
    @nxcol:
    lea edx,[ebx+ebp]
    pop esi
    test edx,edx
    jnz @start0system
    mov [esp+__SYS0],ecx
    @@CLEARROWMARKS:
    cmp ebp,ecx
    jz @count_result_MTX
    @clear_row_mark:
    and [edi+ebx],al
    add ebx,eax
    jnz @clear_row_mark
    sub esp,_SAVE
    jmp @@2ND_STEP
    @@3RD_STEP:
    mov byte ptr [esi+03h],02h
    and byte ptr [edi+edx],11111110b
    @@2ND_STEP:
    xor ebx,ebx
    mov esi,[esp+_SAVE+__MTX]
    xor ecx,ecx
    mov edx,00FFFFFFh
    sub ebx,ebp
    @free0:
    sub ecx,ebp
    @freerow:
    test byte ptr [edi+ebx],02h
    jz @zeroinrow
    add ebx,01h
    lea esi,[esi+ebp*04h]
    jnz @freerow
    jmp @@5TH_STEP
    @zeroinrow:
    test byte ptr [edi+ecx],01h
    jnz @handlecol
    cmp edx,[esi]
    jbe @handlecol
    mov edx,[esi]
    test edx,edx
    jz @@DECIDE_NEXT_STEP
    add esp,_SAVE
    pushad
    @handlecol:
    add ecx,01h
    lea esi,[esi+04h]
    jnz @zeroinrow
    add ebx,01h
    jnz @free0
    @@5TH_STEP:
    xor ecx,ecx
    mov esi,[esp+_SAVE+__MTX]
    sub ebx,ebp
    @nx5row:
    sub ecx,ebp
    test byte ptr [edi+ebx],02h
    jz @decrease_row_free
    @increase_double_markeds:
    mov al,[esi+03h]
    and al,11111100b
    bt dword ptr [edi+ecx],00h
    sbb al,00h
    sbb eax,eax
    and eax,edx
    add [esi],eax
    add ecx,01h
    lea esi,[esi+04h]
    jnz @increase_double_markeds
    jmp @step5row
    @decrease_row_free:
    bt dword ptr [edi+ecx],00h
    mov al,[esi+03h]
    adc al,00h
    mov eax,00000000h
    cmovz eax,edx
    sub [esi],eax
    add ecx,01h
    lea esi,[esi+04h]
    jnz @decrease_row_free
    @step5row:
    add ebx,01h
    jnz @nx5row
    popad
    sub esp,20h
    @@DECIDE_NEXT_STEP:
    mov edx,0FFFFFF00h
    lea eax,[ebx+ebp]
    or byte ptr [edi+ebx],02h
    add dl,[edi+eax]
    jnz @@3RD_STEP
    @@4TH_STEP:
    mov edx,[esp+_SAVE+__MTX]
    add dword ptr [esp+_SAVE+__SYS0],01h
    @0colon:
    mov [edi+eax],cl
    add ecx,ebp
    mov byte ptr [esi+03h],01h
    xor eax,eax
    lea esi,[edx+ecx*04h]
    shl ecx,02h
    sub eax,ebp
    @search_star_in_column:
    test byte ptr [esi+03h],01h
    jz @nxstar
    cmp eax,ebx
    jnz @0_star
    @nxstar:
    add eax,01h
    lea esi,[esi+ebp*04h]
    jnz @search_star_in_column
    jmp @@1ST_STEP
    @0_star:
    mov ebx,eax
    mov byte ptr [esi+03h],00h
    add eax,ebp
    sub esi,ecx
    xor ecx,ecx
    mov byte ptr [edi+eax],00h
    sub ecx,ebp
    @search_colon_in_row:
    test byte ptr [esi+03h],02h
    jnz @0colon
    add ecx,01h
    lea esi,[esi+04h]
    jnz @search_colon_in_row
    @error:
    nop
    @@1ST_STEP:
    cmp ebp,[esp+_SAVE+__SYS0]
    jz @count_result_STACK
    lea edx,[edx+ebp*04h]
    push ebp
    @remove_row_marks_set_cols:
    lea esi,[edx-04h]
    sub edi,01h
    xor bl,bl
    mov ecx,ebp
    sub edx,04h
    @chkstarmark:
    mov al,[esi+03h]
    and al,11111101b
    mov [esi+03h],al
    and al,01h
    or bl,al
    sub ecx,01h
    lea esi,[esi+ebp*04h]
    jnz @chkstarmark
    sub dword ptr [esp],01h
    mov [edi],bl
    jnz @remove_row_marks_set_cols
    pop ebx
    add edi,ebp
    jmp @@2ND_STEP
    @count_result_STACK:
    add esp,_SAVE
    @count_result_MTX:
    xor ecx,ecx
    xor eax,eax
    mov esi,[esp+__SAVE]
    mov ebx,[esp+__MARKS]
    @results:
    movsx edx,byte ptr [edi+ecx]
    mov byte ptr [edi+ecx],00h
    add ecx,01h
    add edx,ebp
    add eax,[esi+edx*04h]
    cmp ecx,ebp
    mov [ebx],dl
    lea esi,[esi+ebp*04h]
    lea ebx,[ebx+01h]
    jnz @results

    6-7 évvel ezelőtti kód:
    mov ecx,[mtxdblsize]
    mov edi,[marks]
    xor eax,eax
    mov ebp,[mtxsize]
    mov edx,ecx
    rep stosd
    lea edi,[tempmarks]
    mov ecx,edx
    rep stosd

    @reduce_rows_with_minimum:
    mov edi,[mtx]
    mov edx,ebp
    mov esi,00FFFFFFh
    @row_minimum:
    mov ecx,ebp
    mov ebx,edi
    mov eax,esi
    mov ch,cl
    @find_row_minimum:
    scasd
    jb @after_row_test
    mov eax,[edi-04h]
    @after_row_test:
    dec cl
    jnz @find_row_minimum
    @start_reduce_row:
    test eax,eax
    jnz @force_reduce_row
    dec edx
    jnz @row_minimum
    jmp @reduce_columns_with_m
    @force_reduce_row:
    cmp eax,esi
    jz @abnormal_exit
    @reduce_row_elements:
    cmp [ebx],esi
    ja @after_row_reduction
    sub [ebx],eax
    @after_row_reduction:
    add ebx,04h
    dec ch
    jnz @reduce_row_elements
    dec edx
    jnz @row_minimum

    @reduce_columns_with_minimum
    mov esi,[mtx]
    mov ebx,ebp
    lea edi,[ebp*04h]
    @column_minimum:
    mov ecx,ebp
    mov eax,00FFFFFFh
    mov edx,esi
    mov ch,cl
    @find_column_minimum:
    cmp [esi],eax
    ja @after_column_test
    mov eax,[esi]
    test eax,eax
    jz @next_decrease_column
    @after_column_test:
    add esi,edi
    dec cl
    jnz @find_column_minimum
    @start_reduce_column:
    cmp eax,00FFFFFFh
    jz @abnormal_exit
    mov esi,edx
    @reduce_column_elements:
    cmp dword ptr [esi],00FFFF
    ja @after_column_reduction
    sub [esi],eax
    @after_column_reduction:
    add esi,edi
    dec ch
    jnz @reduce_column_element
    @next_decrease_column:
    lea esi,[edx+04h]
    dec ebx
    jnz @column_minimum

    @determine_start0_system:
    mov [sys0],ebx
    mov edi,[marks]
    mov ebx,ebp
    mov eax,0102h
    mov esi,[mtx]
    xor ecx,ecx
    dec ebx
    push esi
    xor edx,edx
    @start_0system:
    lea esi,[esi+ebx*04h]
    @find_free0:
    test [edi+edx],al
    jnz @check_next_item
    cmp [esi],ecx
    jnz @check_next_item
    or [edi+edx],al
    or [edi+ebx],ah
    or [esi+03h],ah
    inc [sys0]
    jmp @next_column
    @check_next_item:
    inc edx
    lea esi,[esi+ebp*04h]
    cmp edx,ebp
    jnz @find_free0
    @next_column:
    mov esi,[esp]
    xor edx,edx
    dec ebx
    jns @start_0system
    pop ebx

    cmp ebp,[sys0]
    jz @count_result
    mov ecx,[mtxdblsize]
    mov eax,01010101h
    @clear_line_marks:
    and [edi],eax
    add edi,04h
    dec ecx
    jnz @clear_line_marks
    pushad
    jmp @@2nd_step
    @@1st_step:
    mov edi,[marks]
    cmp ebp,[sys0]
    jz @count_result_with_POPA
    mov ebx,ebp
    add edi,ebp
    mov al,0FDh
    lea edx,[edx+ebx*04h-01h]
    dec edi
    @remove_line_marks_set_cols:
    mov esi,edx
    mov [edi],bh
    mov ecx,ebp
    @check_star_mark:
    and [esi],al
    jz @marks_tested
    mov ah,[esi]
    and ah,01h
    or [edi],ah
    @marks_tested:
    lea esi,[esi+ebp*04h]
    dec ecx
    jnz @check_star_mark
    @star_next_column:
    sub edx,04h
    dec edi
    dec ebx
    jnz @remove_line_marks_set
    @@2nd_step:
    mov esi,[mtx]
    mov ecx,ebp
    mov ebx,[marks]
    mov edx,00FFFFFFh
    @search_free0:
    mov edi,[marks]
    xor eax,eax
    mov ch,byte ptr [mtxsize]
    @search_free0_prev_marked:
    test byte ptr [ebx],02h
    jz @look_for_zero
    inc ebx
    dec cl
    jz @@5th_step
    lea esi,[esi+ebp*04h]
    jmp @search_free0_prev_mar
    @look_for_zero:
    test byte ptr [edi],01h
    jnz @handle_column_counter
    cmp edx,[esi]
    jle @handle_column_counter
    mov edx,[esi]
    test edx,edx
    jz @decide_next_step
    add esp,20h
    pushad
    jmp @step_next_item
    @handle_column_counter:
    test byte ptr [esi+03h],01
    jz @step_next_item
    mov eax,edi
    @step_next_item:
    add esi,04h
    inc edi
    dec ch
    jnz @look_for_zero
    inc ebx
    dec cl
    jnz @search_free0
    jmp @@5th_step
    @decide_next_step:
    mov edx,0102h
    or [ebx],dl
    test eax,eax
    jz @continue_search
    @@3rd_step:
    and byte ptr [eax],0FEh
    mov [esi+03h],dl
    jmp @@2nd_step
    @continue_search:
    mov ebx,ecx
    lea eax,[esi+03h]
    dec ch
    jz @@4th_step
    add esi,07h
    inc edi
    @search_0star:
    test [esi],dh
    jz @next_0star_test
    and byte ptr [edi],0FEh
    or [eax],dl
    jmp @@2nd_step
    @next_0star_test:
    add esi,04h
    inc edi
    dec ch
    jnz @search_0star
    @@4th_step:
    or [eax],dh
    xor ecx,ecx
    xor edx,edx
    inc [sys0]
    mov cl,bl
    mov edi,ebp
    mov dl,bh
    sub edi,ecx
    mov ecx,ebp
    mov eax,[mtx]
    lea ebx,[ebp*04h]
    sub ecx,edx
    mov dl,01h
    push eax
    @check_column:
    shl ecx,02h
    lea esi,[eax+ecx+03h]
    xor eax,eax
    @search_star_in_column:
    test [esi],dl
    jz @prepare_for_next_item
    cmp eax,edi
    jnz @found_0_star
    @prepare_for_next_item:
    inc eax
    add esi,ebx
    cmp ebp,eax
    jnz @search_star_in_column
    pop edx
    jmp @@1st_step
    @found_0_star:
    mov byte ptr [esi],00h
    sub esi,ecx
    mov edi,eax
    xor ecx,ecx
    mov eax,0102h
    @search_colon_in_row:
    test [esi],al
    jnz @found_0_colon
    inc ecx
    add esi,04h
    cmp ecx,ebp
    jnz @search_colon_in_row
    @found_0_colon:
    mov [esi],ah
    mov eax,[esp]
    mov dl,01h
    jmp @check_column
    @@5th_step:
    mov esi,[mtx]
    sub ebx,ebp
    mov ecx,ebp
    mov al,01h
    mov ebp,ebx
    add esi,03h
    @decrease_next_row:
    mov ch,[esp+08h]
    mov edi,ebp
    test byte ptr [ebx],02h
    jz @start_decrease_current
    mov ah,0FCh
    @increase_double_markeds:
    test [edi],al
    jz @step_next_element
    test [esi],ah
    jnz @step_next_element
    add [esi-03h],edx
    @step_next_element:
    add esi,04h
    inc edi
    dec ch
    jnz @increase_double_marke
    inc ebx
    dec cl
    jnz @decrease_next_row
    popad
    pushad
    jmp @decide_next_step
    @start_decrease_current_row:
    mov ah,0FFh
    @decrease_current_row:
    test [edi],al
    jnz @jump_next_element
    test [esi],ah
    jnz @jump_next_element
    sub [esi-03h],edx
    @jump_next_element:
    add esi,04h
    inc edi
    dec ch
    jnz @decrease_current_row
    @step_on_next_row:
    inc ebx
    dec cl
    jnz @decrease_next_row
    popad
    pushad
    jmp @decide_next_step
    @count_result_with_POPAD:
    add esp,20h
    @count_result:
    mov esi,[save]
    shl ebp,02h
    mov ebx,edi
    xor ecx,ecx
    mov edi,[mtx]
    mov eax,01000000h
    push edi
    mov edx,[sys0]
    @count_optimum:
    @test_elements:
    scasd
    jnz @test_elements
    @increase_optimum:
    sub edi,04h
    sub edi,[esp]
    add [esp],ebp
    add ecx,[esi+edi]
    add esi,ebp
    shr edi,02h
    mov [ebx],edi
    inc ebx
    dec edx
    mov edi,[esp]
    jnz @count_optimum
    @store_optimum:
    pop eax
    shr ebp,02h
    mov eax,ecx

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Aktív témák