1 |
{ Copyright (C) 2009 Bas Steendijk and Peter Green
|
2 |
For conditions of distribution and use, see copyright notice in zlib_license.txt
|
3 |
which is included in the package
|
4 |
----------------------------------------------------------------------------- }
|
5 |
|
6 |
unit readtxt2;
|
7 |
|
8 |
interface
|
9 |
|
10 |
{
|
11 |
readtxt, version 2
|
12 |
by beware
|
13 |
|
14 |
this can be used to read a text file exposed as a tstream line by line.
|
15 |
automatic handling of CR, LF, and CRLF line endings, and readout of detected line ending type.
|
16 |
fast: 1.5-2 times faster than textfile readln in tests.
|
17 |
}
|
18 |
|
19 |
uses
|
20 |
classes,sysutils;
|
21 |
|
22 |
const
|
23 |
bufsize=4096;
|
24 |
eoltype_none=0;
|
25 |
eoltype_any=0;
|
26 |
eoltype_cr=1;
|
27 |
eoltype_lf=2;
|
28 |
eoltype_crlf=3;
|
29 |
|
30 |
type
|
31 |
treadtxt=class(tobject)
|
32 |
private
|
33 |
buf:array[0..bufsize-1] of byte;
|
34 |
numread:integer;
|
35 |
bufpointer:integer;
|
36 |
currenteol,preveol:integer;
|
37 |
fileeof,reachedeof:boolean;
|
38 |
fdetectedeol:integer;
|
39 |
procedure checkandread;
|
40 |
public
|
41 |
sourcestream:tstream;
|
42 |
destroysourcestream:boolean;
|
43 |
allowedeol:integer;
|
44 |
constructor create(asourcestream: tstream; adestroysourcestream:boolean);
|
45 |
constructor createf(filename : string);
|
46 |
|
47 |
function readline:ansistring;
|
48 |
function eof:boolean;
|
49 |
destructor destroy; override;
|
50 |
property detectedeol : integer read fdetectedeol;
|
51 |
end;
|
52 |
|
53 |
implementation
|
54 |
|
55 |
constructor treadtxt.create(asourcestream: tstream; adestroysourcestream:boolean);
|
56 |
begin
|
57 |
inherited create;
|
58 |
sourcestream := asourcestream;
|
59 |
destroysourcestream := adestroysourcestream;
|
60 |
|
61 |
//if sourcestream.Position >= sourcestream.size then fileeof := true;
|
62 |
bufpointer := bufsize;
|
63 |
end;
|
64 |
|
65 |
constructor treadtxt.createf(filename: string);
|
66 |
begin
|
67 |
create(tfilestream.create(filename,fmOpenRead or fmShareDenyWrite),true);
|
68 |
end;
|
69 |
|
70 |
|
71 |
procedure treadtxt.checkandread;
|
72 |
begin
|
73 |
if bufpointer >= numread then begin
|
74 |
numread := sourcestream.read(buf,bufsize);
|
75 |
bufpointer := 0;
|
76 |
if numread = 0 then fileeof := true;
|
77 |
|
78 |
end;
|
79 |
end;
|
80 |
|
81 |
function treadtxt.readline;
|
82 |
var
|
83 |
a,b,c,d:integer;
|
84 |
prevchar : integer;
|
85 |
trimchar : boolean;
|
86 |
begin
|
87 |
prevchar := 0;
|
88 |
result := '';
|
89 |
repeat
|
90 |
checkandread;
|
91 |
b := numread-1;
|
92 |
trimchar := false;
|
93 |
{core search loop begin}
|
94 |
d := -1;
|
95 |
for a := bufpointer to b do begin
|
96 |
c := buf[a];
|
97 |
//check if the character can possibly be a line ending before getting
|
98 |
//into the more complex checks that depend on eol type
|
99 |
if (c = 10) or (c = 13) then case allowedeol of
|
100 |
eoltype_any: begin
|
101 |
d := a;
|
102 |
break;
|
103 |
end;
|
104 |
eoltype_cr: begin
|
105 |
if (c = 13) then begin
|
106 |
d := a;
|
107 |
break;
|
108 |
end;
|
109 |
end;
|
110 |
eoltype_lf: begin
|
111 |
if (c = 10) then begin
|
112 |
d := a;
|
113 |
break;
|
114 |
end;
|
115 |
end;
|
116 |
eoltype_crlf: begin
|
117 |
if (c = 10) and (prevchar= 13) then begin
|
118 |
d := a;
|
119 |
trimchar := true;
|
120 |
break;
|
121 |
end;
|
122 |
prevchar := c;
|
123 |
end;
|
124 |
else begin
|
125 |
raise exception.create('undefined eol type set');
|
126 |
end;
|
127 |
end;
|
128 |
prevchar := c;
|
129 |
end;
|
130 |
{core search loop end}
|
131 |
|
132 |
c := length(result);
|
133 |
if (d = -1) then begin
|
134 |
{ran out of buffer before end of line}
|
135 |
b := numread-bufpointer;
|
136 |
setlength(result,c+b);
|
137 |
move(buf[bufpointer],result[c+1],b);
|
138 |
bufpointer := numread;
|
139 |
if fileeof then begin
|
140 |
{we reached the end of the file, return what we have}
|
141 |
reachedeof := true;
|
142 |
exit;
|
143 |
end;
|
144 |
end else begin
|
145 |
|
146 |
preveol := currenteol;
|
147 |
currenteol := buf[d];
|
148 |
|
149 |
{end of line before end of buffer}
|
150 |
if (currenteol = 10) and (preveol = 13) and (bufpointer = d) then begin
|
151 |
{it's the second EOL char of a DOS line ending, don't cause a line}
|
152 |
bufpointer := d+1;
|
153 |
fdetectedeol := eoltype_crlf;
|
154 |
end else begin
|
155 |
if fdetectedeol = eoltype_none then begin
|
156 |
if (currenteol = 10) then fdetectedeol := eoltype_lf else fdetectedeol := eoltype_cr;
|
157 |
end;
|
158 |
b := d-bufpointer;
|
159 |
if trimchar then begin
|
160 |
setlength(result,c+b-1);
|
161 |
move(buf[bufpointer],result[c+1],b-1);
|
162 |
bufpointer := d+1;
|
163 |
end else begin
|
164 |
setlength(result,c+b);
|
165 |
move(buf[bufpointer],result[c+1],b);
|
166 |
bufpointer := d+1;
|
167 |
end;
|
168 |
|
169 |
{EOF check}
|
170 |
if fileeof then begin
|
171 |
if (bufpointer >= numread) then reachedeof := true;
|
172 |
if (currenteol = 13) and (bufpointer = numread-1) then if (buf[bufpointer] = 10) then reachedeof := true;
|
173 |
end;
|
174 |
|
175 |
exit;
|
176 |
end;
|
177 |
end;
|
178 |
until false;
|
179 |
end;
|
180 |
|
181 |
function treadtxt.eof:boolean;
|
182 |
begin
|
183 |
checkandread;
|
184 |
result := ((bufpointer >= numread) and fileeof) or reachedeof;
|
185 |
end;
|
186 |
|
187 |
destructor treadtxt.destroy;
|
188 |
begin
|
189 |
if destroysourcestream then if assigned(sourcestream) then sourcestream.destroy;
|
190 |
inherited destroy;
|
191 |
end;
|
192 |
|
193 |
end.
|