[Back to STRINGS SWAG index] [Back to Main SWAG index] [Original]
{
[marcus is looking for an algorithm, which handles finding strings like
german names, which sometimes are written with "umlauts" and sometimes
not]
the solution for your problem is the soundex-algo.:
if, for example you have to index a database on strings, you normally
would get an alphanumeric sequence by asciicode. instead the soundex will
sort your records on a more phonetic way:
}
(* procedure : soundex.pro
purpose : search for similar sounding strings
compiler : => tp 4.0
date : 14.07.91
*)
Type
Lstring = String[255];
Var
CK_Name1,CK_Name2 : Lstring;
{
convert str to uppercase, careful, doesn't work with umlauts
this function from swag does:
Function UpCaseStr(St : string) : String;
var
regs : registers;
begin
Regs.DS := Seg(st[1]);
Regs.DX := Ofs(st[1]);
Regs.CX := Length(st);
Regs.AX := $6521;
MsDos(Regs);
UpCaseStr := St;
end;
}
Procedure To_upper (Var str : Lstring);
Var
I : Integer;
Begin
For I := 1 to Length (str) do
str [I] := upcase (str[I]);
End { To_Upper };
{ remove all occurances of double letters like wie oo,tt,ee, etc. }
Procedure eliminate_doubles (Var str : lstring);
Var
I,J : Integer;
Begin
For I := 1 to Length (str) do
Begin
If str [I] = str [I + 1] then
Begin
For J := I + 1 to Length (str)-1 do
str [J] := str [J + 1];
End
End
End { eliminate_doubles };
{ Code 'Code' for soundex comparison }
Procedure Sound_Ex (var Code : Lstring);
Var
I : Integer;
Sndex : Lstring;
Begin
Sndex := '';
Sndex := Sndex + Code [1];
For I := 2 to Length (Code) do
Begin
Case Code [I] of
'B','F','P','V' : Sndex := Sndex + '1';
'C','G','J','K','Q','S','S','Z' : Sndex := Sndex + '2';
'D','T' : Sndex := Sndex + '3';
'L' : Sndex := Sndex + '4';
'M','N' : Sndex := Sndex + '5';
'R' : Sndex := Sndex + '6';
End { case };
End { For };
If Length (Sndex) > 4 then Sndex := Copy (Sndex,1,4);
If Length (Sndex) < 4 then
For I := Length (Sndex) to 3 do Sndex := Sndex + '0';
Code := Sndex;
End { Sound_Ex };
{**************************************************
* returns TRUE, if Name1 in Soundexcode *
* ressembles to Name2, returns falsch, if not *
**************************************************}
Function Sounds_Like (Name1,Name2 : Lstring) : Boolean;
Var
Tnam1,Tnam2 : Lstring;
Begin
Tnam1 := Name1;
Tnam2 := Name2;
To_Upper (Tnam1);
To_Upper (Tnam2);
eliminate_doubles (Tnam1);
eliminate_doubles (Tnam2);
Sound_Ex (Tnam1);
Sound_Ex (Tnam2);
Writeln;
Writeln ('> ',Tnam1,' <> ',tnam2,' <');
If Tnam1 = Tnam2 then
Sounds_Like := TRUE
Else
Sounds_Like := FALSE;
End { Sounds_Like };
{
*******************
* DEMO *
*******************
}
Begin
Write ('1. Name please: ');Readln (CK_Name1);
Write ('2. Name please: ');Readln (CK_Name2);
Writeln;Writeln;
Writeln (CK_Name1,' and ',CK_Name2);
If Sounds_Like (CK_Name1,CK_Name2) Then
Writeln (' sound ALIKE !')
Else
Writeln (' do not sound alike at all !');
End.
{
the used chars are languagedependant and should be used according to the
distribution of their occurances in the used language. you have to trick
around a bit with them, until you'll get the best result. those above
should work fine for the english language (which again is spoken in this
echo ;-) ).
}
[Back to STRINGS SWAG index] [Back to Main SWAG index] [Original]