Смотрите какую забавную штуку нашел:
- From
- Boris Rudakov (2:5054/9.4)
- To
- All ()
- Date
- 1996-07-19T18:02Z
- Area
- PERM.PROG
Hello All!
=== Cut ===
TextSearch, FastSearch, and FuzzySearch: Implement
Searching Algorithms
The TextSearch, FastSearch, and FuzzySearch samples
implement three searching algorithms:
> TextSearch uses the "brute-force" approach. It searches
a large text block for a specific pattern of characters by
comparing each character in the pattern with each
character in the text buffer.
> FastSearch uses the Shift-AND algorithm. It builds a
characteristic vector table (CVT) for a search pattern
that is up to 32 characters in length.
> FuzzySearch uses a variation of the Shift-AND algorithm
to perform approximate pattern matching. It ignores a
single character mismatch (for example, you can locate
"Karla" by typing "Carla" or "Karla"), but can be extended
for a specific number of mismatches.
TextSearch, FastSearch, and FuzzySearch are companion
files for "MS-DOS Q&A" by Jeff Prosise (Microsoft Systems
Journal, Vol. 8, No. 4). The source code for these
applications has not been modified or tested for the
Microsoft(R) Developer Network CD. The source files may
appear in various forms (source code only, ready to build,
or ready to run).
KEYWORDS: CD4
=== Cut ===
Первый алгоритм лажевенький, остальные - ничего так.
=== Cut ===
Figure 1
Search Procedure Utilizing a Brute-Force Approach
;; TextSearch uses a brute-force approach to searching a text buffer for
;; an exact pattern match.
;;
;; Call with: DS:SI -> Pattern
;; ES:DI -> Text buffer
;; BX = Pattern length
;; CX = Text length
;;
;; Returns: Carry clear if match found, carry set if not
TextSearch proc near
sub cx,bx ;Compute the max number of
inc cx ;passes through the loop
cld ;Clear direction flag
TS1: push cx ;Save registers
push si
push di
mov cx,bx ;CX = Pattern length
repe cmpsb ;Compare the strings
pop di ;Restore registers
pop si
pop cx
je TS2 ;Match found if Z=1
inc di ;Otherwise increment buffer
loop TS1 ;pointer and loop
stc ;Set carry and return
ret
TS2: clc ;Clear carry and return
ret
TextSearch endp
*****
Figure 2
Search Procedure Utilizing the Shift-AND Algorithm
;; BuildCVTable builds the characteristic vector table for a search pattern
;; that is up to 32 characters in length. No error checking is performed to
;; verify that the pattern length is greater than 0 and less than or equal
;; to 32.
;;
;; Call with: DS:SI -> Pattern
;; ES:DI -> Characteristic vector table
;; CX = Pattern length
;;
;; Note: The characteristic vector table should contain 256 double words.
BuildCVTable proc near
push cx ;Save CX
push di ;Save DI
cld ;Clear direction flag
xor ax,ax ;Initialize the CVT so
rep stosw ;it contains all zeroes
pop di ;Restore DI
pop cx ;Restore CX
mov dx,8000h ;Set DX:AX to 80000000h
xor ax,ax
BCVT1: xor bh,bh ;Initialize BH to 0
mov bl,[si] ;Get the next character
inc si ;Increment pointer
shl bx,1 ;Multiply BX by 4 to compute
shl bx,1 ;an index into the CVT
or es:[di+bx],ax ;Set the bit that corresponds
or es:[di+bx+2],dx ;to this position in the CVT
shr dx,1 ;Shift DX:AX right one bit
rcr ax,1
loop BCVT1 ;Loop until done
ret ;Return to caller
BuildCVTable endp
;; FastSearch uses the Shift-AND algorithm described by Manber and Wu
;; in the November 1992 issue of Byte magazine to search a text buffer
;; for an exact pattern match. No error checking is performed to verify
;; that the pattern length is valid, or that the length of the text
;; buffer equals or exceeds the pattern length.
;;
;; Call with: DS:SI -> Text to be searched
;; ES:DI -> Characteristic vector table
;; CX = Pattern length
;; BX = Text length
;;
;; Returns: Carry clear if match found, carry set if not
FastSearch proc near
mov dx,8000h ;Set DX:AX to 80000000h
xor ax,ax
dec cx ;Decrement pattern length
FS1: shr dx,1 ;Shift DX:AX right n-1 times,
rcr ax,1 ;where n equals the pattern
loop FS1 ;length
push dx ;Save DX:AX on the stack
push ax
mov bp,sp ;Copy SP to BP
mov dx,8000h ;Set DX:AX to 80000000h
xor ax,ax
mov cx,bx ;CX = Text length
FS2: xor bh,bh ;Initialize BH to 0
mov bl,[si] ;Get the next character
inc si ;Increment pointer
shl bx,1 ;Multiply BX by 4 to compute
shl bx,1 ;an index into the CVT
shr dx,1 ;Shift DX:AX right one bit
rcr ax,1
or dx,8000h ;Set the high bit of DX
and ax,es:[di+bx] ;AND DX:AX with the vector
and dx,es:[di+bx+2] ;for this character
push ax ;Save DX:AX
push dx
and ax,[bp] ;AND DX:AX with the value
and dx,[bp+2] ;saved on the stack at the
or ax,ax ;beginning of the routine
jnz FSExit
or dx,dx ;Match found if DX:AX is
jnz FSExit ;not equal to 0
pop dx ;Restore DX:AX
pop ax
loop FS2 ;Loop until done
add sp,4 ;Clear the stack
stc ;Set the carry flag
ret ;Return to caller
FSExit: add sp,8 ;Clear the stack
clc ;Clear the carry flag
ret ;Return to caller
FastSearch endp
*****
Figure 3
Modified Shift-AND with One Substitution Permitted
;; FuzzySearch uses a variation of the Shift-AND algorithm to search a text
;; buffer for an approximate pattern match (one substitution permitted). No
;; error checking is performed to verify that the pattern length is valid,
;; or that the length of the text buffer equals or exceeds the pattern
;; length.
;;
;; Call with: DS:SI -> Text to be searched
;; ES:DI -> Characteristic vector table
;; CX = Pattern length
;; BX = Text length
;;
;; Returns: Carry clear if match found, carry set if not
FuzzySearch proc near
mov dx,8000h ;Set DX:AX to 80000000h
xor ax,ax
push dx ;Save DX:AX on the stack
push ax
dec cx ;Decrement pattern length
FS1: shr dx,1 ;Shift DX:AX right n-1 times,
rcr ax,1 ;where n equals the pattern
loop FS1 ;length
push dx ;Save DX:AX on the stack
push ax
mov bp,sp ;Copy SP to BP
mov dx,8000h ;Set DX:AX to 80000000h
xor ax,ax
mov cx,bx ;CX = Text length
FS2: xor bh,bh ;Initialize BH to 0
mov bl,[si] ;Get the next character
inc si ;Increment pointer
shl bx,1 ;Multiply BX by 4 to compute
shl bx,1 ;an index into the CVT
shr dx,1 ;Shift DX:AX right one bit
rcr ax,1
push dx ;Save DX:AX on the stack
push ax
mov ax,[bp+4] ;Load DX:AX with value saved
mov dx,[bp+6] ;on the top of the stack
shr dx,1 ;Shift DX:AX right one bit
rcr ax,1
and ax,es:[di+bx] ;AND DX:AX with the vector
and dx,es:[di+bx+2] ;for this character
or ax,[bp-4] ;OR DX:AX with the value
or dx,[bp-2] ;pushed on the stack
or dx,8000h ;Set the high bit of DX
mov [bp+4],ax ;Place the result back on
mov [bp+6],dx ;the stack
and ax,[bp] ;AND DX:AX with the value
and dx,[bp+2] ;saved on the stack at the
or ax,ax ;beginning of the routine
jnz FSExit
or dx,dx ;Match found if DX:AX is
jnz FSExit ;not equal to 0
pop ax ;Restore DX:AX
pop dx
or dx,8000h ;Set the high bit of DX
and ax,es:[di+bx] ;AND DX:AX with the vector
and dx,es:[di+bx+2] ;for this character
loop FS2 ;Loop until done
add sp,8 ;Clear the stack
stc ;Set the carry flag
ret ;Return to caller
FSExit: add sp,12 ;Clear the stack
clc ;Clear the carry flag
ret ;Return to caller
FuzzySearch endp
=== Cut ===
Boris Rudakov, Кстати, моя фамилия - Ржевский.
BBR
--- Be happy: BBR is looking at you !
* Origin: АлкАголь малыми дозами безвреден в любых количествах (2:5054/9.4)