[Back to FINDREPL SWAG index] [Back to Main SWAG index] [Original]
{ BOB SWART
Here it is, all new and much faster. I used an internal binary tree to manage
the taglines. You can store up to the available RAM in taglines:
}
{$A+,B-,D-,E-,F-,G-,I-,L-,N-,O-,P-,Q-,R-,S+,T-,V-,X-}
{$M 16384,0,655360}
Uses
Crt;
Type
TBuffer = Array[0..$4000] of Char;
Const
Title = 'TagLines 0.2 by Bob Swart For Travis Griggs'#13#10;
Usage = 'Usage: TagLines inFile outFile'#13#10#13#10+
' Taglines will remove dupicate lines from inFile.'#13#10+
' Resulting Text is placed in outFile.'#13#10;
NumLines: LongInt = 0; { total number of lines in InFile }
NmLdiv80: LongInt = 0; { NumLines div 80, For 'progress' }
CurrentL: LongInt = 0; { current lineno read from InFile }
Type
String80 = String[80];
PBinTree = ^TBinTree;
TBinTree = Record
Info: String80;
left,right: PBinTree
end;
Var
InBuf,
OutBuf : TBuffer;
InFile,
OutFile : Text;
TagLine : String80;
Root,
Current,
Prev : PBinTree;
i : Integer;
SaveExit : Pointer;
Function CompStr(Var Name1,Name2: String): Integer; Assembler;
{ Author: drs. Robert E. Swart
}
Asm
push DS
lds SI,Name1 { ds:si pts to Name1 }
les DI,Name2 { es:di pts to Name2 }
cld
lodsb { get String1 length in AL }
mov AH,ES:[DI] { get String2 length in AH }
inc DI
mov BX,AX { save both lengths in BX }
xor CX,CX { clear cx }
mov CL,AL { get String1 length in CX }
cmp CL,AH { equal to String2 length? }
jb @Len { CX stores minimum length }
mov CL,AH { of String1 and String2 }
@Len: jcxz @Exit { quit if null }
@Loop: lodsb { String1[i] in AL }
mov AH,ES:[DI] { String2[i] in AH }
cmp AL,AH { compare Str1 to Str2 }
jne @Not { loop if equal }
inc DI
loop @Loop { go do next Char }
jmp @Exit { Strings OK, Length also? }
@Not: mov BX,AX { BL = AL = String1[i],
BH = AH = String2[i] }
@Exit: xor AX,AX
cmp BL,BH { length or contents comp }
je @Equal { 1 = 2: return 0 }
jb @Lower { 1 < 2: return -1 }
inc AX { 1 > 2: return 1 }
inc AX
@Lower: dec AX
@Equal: pop DS
end {CompStr};
Procedure Stop; Far;
begin
ExitProc := SaveExit;
Close(InFile);
Close(OutFile);
end {Stop};
begin
Writeln(Title);
if Paramcount <> 2 then
begin
Writeln(Usage);
Halt
end;
Assign(InFile,ParamStr(1));
SetTextBuf(InFile,InBuf);
Reset(InFile);
if IOResult <> 0 then
begin
WriteLn('Error: could not open ', ParamStr(1));
Halt(1)
end;
Assign(OutFile,ParamStr(2));
SetTextBuf(OutFile,OutBuf);
Reset(OutFile);
if IOResult = 0 then
begin
WriteLn('Error: File ', ParamStr(2),' already exists');
Halt(2)
end;
ReWrite(OutFile);
if IOResult <> 0 then
begin
WriteLn('Error: could not create ', ParamStr(2));
Halt(3)
end;
SaveExit := ExitProc;
ExitProc := @Stop;
While not eof(InFile) do
begin
readln(InFile);
Inc(NumLines);
end;
Writeln('There are ',NumLines,' lines in this File.'#13#10);
Writeln('Press any key to stop the search For duplicate lines');
NmLdiv80 := NumLines div 80;
Root := nil;
reset(InFile);
While CurrentL <> NumLines do
begin
if KeyPressed then
Halt { calls Stop };
Inc(CurrentL);
if (CurrentL and NmLdiv80) = 0 then
Write('#');
readln(InFile,TagLine);
if root = nil then { first TagLine }
begin
New(Root);
Root^.left := nil;
Root^.right := nil;
Root^.Info := TagLine;
Writeln(OutFile,tagLine)
end
else { binary search For TagLine }
begin
Current := Root;
Repeat
Prev := Current;
i := CompStr(Current^.Info,TagLine);
if i > 0 then
Current := Current^.left
else
if i < 0 then
Current := Current^.right
Until (i = 0) or (Current = nil);
if i <> 0 then { TagLine not found }
begin
New(Current);
Current^.left := nil;
Current^.right := nil;
Current^.Info := TagLine;
if i > 0 then
Prev^.left := Current { Current before Prev }
else
Prev^.right := Current { Current after Prev };
Writeln(OutFile,TagLine)
end
end
end;
Writeln(#13#10'100% Completed, result is in File ',ParamStr(2))
{ close is done by Stop }
end.
{
> I also tried DJ's idea of the buffer of 65535 but it said the structure
> was too large. So I used 64512.
Always try to use a multiple of 4K, because the hard disk 'eats' space in these
chunks. Reading/Writing in these chunks goes a lot faster that way.
}
[Back to FINDREPL SWAG index] [Back to Main SWAG index] [Original]