我有一个程序在输出完成时写入输出,一个特定的文件需要非常大的时间,我想知道是否可以做一些改进它的速度.
该文件最终为25 mbs或更多
它有大约17000行,每行有大约500字段
它的工作原理是:
procedure CWaitList.WriteData(AFile : string; AReplicat : integer; AllFields : Boolean); var fout : TextFile; idx,ndx : integer; MyPat : CPatientItem; begin ndx := FList.Count - 1; AssignFile(fout,AFile); Append(fout); for idx := 0 to ndx do begin MyPat := CPatientItem(FList.Objects[idx]); if not Assigned(MyPat) then Continue; MyPat.WriteItem(fout,AReplicat,AllFields); end; CloseFile(fout); end;
WriteItem是一个从MyPat获取所有值并将其写入文件的过程,并且还调用了另外3个也向文件写入值的函数
所以总的来说,WriteData循环最终大约在1700,每行最后有500个字段
我只是想知道有没有什么可以做的,以提高其性能,或者它总是需要很长时间,因为它有多少数据要写
谢谢
解决方法
加快TextFile的正确方法是使用SetTextBuf.也可以在所有文件访问中添加{$I-} …. {$I}.
var TmpBuf: array[word] of byte; .. {$I-} AssignFile(fout,AFile); Append(fout); SetTextBuf(fOut,TmpBuf); for idx := 0 to ndx do begin MyPat := CPatientItem(FList.Objects[idx]); if not Assigned(MyPat) then Continue; MyPat.WriteItem(fout,AllFields); end; if ioresult<>0 then ShowMessage('Error writing file'); CloseFile(fout); {$I+} end;
在所有情况下,旧文件API现在都不会被使用…
{$I-} …. {$I}也将围绕添加内容到文本文件的所有子例程.
我有一些关于巨大文本文件和缓冲区创建的实验.我在开源SynCommons单元中写了一个专门的类,名为TTextWriter,它是面向UTF-8的.我特别用于JSON生产或LOG writing以最高的速度.它避免大多数临时堆分配(例如从整数值转换),因此它在多线程缩放中甚至非常好.一些高级方法可用于从开放数组格式化一些文本,如format()函数,但要快得多.
这是这个类的接口:
/// simple writer to a Stream,specialized for the TEXT format // - use an internal buffer,faster than string+string // - some dedicated methods is able to encode any data with JSON escape TTextWriter = class protected B,BEnd: PUTF8Char; fStream: TStream; fInitialStreamPosition: integer; fStreamIsOwned: boolean; // internal temporary buffer fTempBufSize: Integer; fTempBuf: PUTF8Char; // [0..4] for 'u0001' four-hex-digits template,[5..7] for one UTF-8 char BufUnicode: array[0..7] of AnsiChar; /// flush and go to next char function FlushInc: PUTF8Char; function GetLength: integer; public /// the data will be written to the specified Stream // - aStream may be nil: in this case,it MUST be set before using any // Add*() method constructor Create(aStream: TStream; aBufSize: integer=1024); /// the data will be written to an internal TMemoryStream constructor CreateOwnedStream; /// release fStream is is owned destructor Destroy; override; /// retrieve the data as a string // - only works if the associated Stream Inherits from TMemoryStream: return // '' if it is not the case function Text: RawUTF8; /// write pending data to the Stream procedure Flush; /// append one char to the buffer procedure Add(c: AnsiChar); overload; {$ifdef HASINLINE}inline;{$endif} /// append two chars to the buffer procedure Add(c1,c2: AnsiChar); overload; {$ifdef HASINLINE}inline;{$endif} /// append an Integer Value as a String procedure Add(Value: Int64); overload; /// append an Integer Value as a String procedure Add(Value: integer); overload; /// append a Currency from its Int64 in-memory representation procedure AddCurr64(Value: PInt64); overload; /// append a Currency from its Int64 in-memory representation procedure AddCurr64(const Value: Int64); overload; /// append a TTimeLog value,expanded as Iso-8601 encoded text procedure AddTimeLog(Value: PInt64); /// append a TDateTime value,expanded as Iso-8601 encoded text procedure AddDateTime(Value: PDateTime); overload; /// append a TDateTime value,expanded as Iso-8601 encoded text procedure AddDateTime(const Value: TDateTime); overload; /// append an Unsigned Integer Value as a String procedure AddU(Value: cardinal); /// append a floating-point Value as a String // - double precision with max 3 decimals is default here,to avoid rounding // problems procedure Add(Value: double; decimals: integer=3); overload; /// append strings or integers with a specified format // - % = #37 indicates a string,integer,floating-point,or class parameter // to be appended as text (e.g. class name) // - $= #36 indicates an integer to be written with 2 digits and a comma // - £ = #163 indicates an integer to be written with 4 digits and a comma // - µ = #181 indicates an integer to be written with 3 digits without any comma // - ¤ = #164 indicates CR+LF chars // - CR = #13 indicates CR+LF chars // - § = #167 indicates to trim last comma // - since some of this characters above are > #127,they are not UTF-8 // ready,so we expect the input format to be WinAnsi,i.e. mostly English // text (with chars < #128) with some values to be inserted inside // - if StringEscape is false (by default),the text won't be escaped before // adding; but if set to true text will be JSON escaped at writing procedure Add(Format: PWinAnsiChar; const Values: array of const; Escape: TTextWriterKind=twNone); overload; /// append CR+LF chars procedure AddCR; {$ifdef HASINLINE}inline;{$endif} /// write the same character multiple times procedure AddChars(aChar: AnsiChar; aCount: integer); /// append an Integer Value as a 2 digits String with comma procedure Add2(Value: integer); /// append the current date and time,in a log-friendly format // - e.g. append '20110325 19241502 ' // - this method is very fast,and avoid most calculation or API calls procedure AddCurrentLogTime; /// append an Integer Value as a 4 digits String with comma procedure Add4(Value: integer); /// append an Integer Value as a 3 digits String without any added comma procedure Add3(Value: integer); /// append a line of text with CR+LF at the end procedure AddLine(const Text: shortstring); /// append a String procedure AddString(const Text: RawUTF8); {$ifdef HASINLINE}inline;{$endif} /// append a ShortString procedure AddShort(const Text: ShortString); {$ifdef HASINLINE}inline;{$endif} /// append a ShortString property name,as '"PropName":' procedure AddPropName(const PropName: ShortString); /// append an Instance name and pointer,as '"TObjectList(00425E68)"'+SepChar // - Instance must be not nil procedure AddInstanceName(Instance: TObject; SepChar: AnsiChar); /// append an Instance name and pointer,as 'TObjectList(00425E68)'+SepChar // - Instance must be not nil procedure AddInstancePointer(Instance: TObject; SepChar: AnsiChar); /// append an array of integers as CSV procedure AddCSV(const Integers: array of Integer); overload; /// append an array of doubles as CSV procedure AddCSV(const Doubles: array of double; decimals: integer); overload; /// append an array of RawUTF8 as CSV procedure AddCSV(const Values: array of RawUTF8); overload; /// write some data as hexa chars procedure WrHex(P: PAnsiChar; Len: integer); /// write some data Base64 encoded // - if withMagic is TRUE,will write as '"\uFFF0base64encodedbinary"' procedure WrBase64(P: PAnsiChar; Len: cardinal; withMagic: boolean); /// write some #0 ended UTF-8 text,according to the specified format procedure Add(P: PUTF8Char; Escape: TTextWriterKind); overload; /// write some #0 ended UTF-8 text,according to the specified format procedure Add(P: PUTF8Char; Len: PtrInt; Escape: TTextWriterKind); overload; /// write some #0 ended Unicode text as UTF-8,according to the specified format procedure AddW(P: PWord; Len: PtrInt; Escape: TTextWriterKind); overload; /// append some chars to the buffer // - if Len is 0,Len is calculated from zero-ended char // - don't escapes chars according to the JSON RFC procedure AddNoJSONEscape(P: Pointer; Len: integer=0); /// append some binary data as hexadecimal text conversion procedure AddBinToHex(P: Pointer; Len: integer); /// fast conversion from binary data into hexa chars,ready to be displayed // - using this function with Bin^ as an integer value will encode it // in big-endian order (most-signignifican byte first): use it for display // - up to 128 bytes may be converted procedure AddBinToHexDisplay(Bin: pointer; BinBytes: integer); /// add the pointer into hexa chars,ready to be displayed procedure AddPointer(P: PtrUInt); /// append some unicode chars to the buffer // - WideCharCount is the unicode chars count,not the byte size // - don't escapes chars according to the JSON RFC // - will convert the Unicode chars into UTF-8 procedure AddNoJSONEscapeW(P: PWord; WideCharCount: integer); /// append some UTF-8 encoded chars to the buffer // - if Len is 0,Len is calculated from zero-ended char // - escapes chars according to the JSON RFC procedure AddJSONEscape(P: Pointer; Len: PtrInt=0); overload; /// append some UTF-8 encoded chars to the buffer,from a generic string type // - faster than AddJSONEscape(pointer(StringToUTF8(string)) // - if Len is 0,Len is calculated from zero-ended char // - escapes chars according to the JSON RFC procedure AddJSONEscapeString(const s: string); {$ifdef UNICODE}inline;{$endif} /// append some Unicode encoded chars to the buffer // - if Len is 0,Len is calculated from zero-ended widechar // - escapes chars according to the JSON RFC procedure AddJSONEscapeW(P: PWord; Len: PtrInt=0); /// append an open array constant value to the buffer // - "" will be added if necessary // - escapes chars according to the JSON RFC // - very fast (avoid most temporary storage) procedure AddJSONEscape(const V: TVarRec); overload; /// append a dynamic array content as UTF-8 encoded JSON array // - expect a dynamic array TDynArray wrapper as incoming parameter // - TIntegerDynArray,TInt64DynArray,TCardinalDynArray,TDoubleDynArray,// TCurrencyDynArray,TWordDynArray and TByteDynArray will be written as // numerical JSON values // - TRawUTF8DynArray,TWinAnsiDynArray,TRawByteStringDynArray,// TStringDynArray,TWideStringDynArray,TSynUnicodeDynArray,TTimeLogDynArray,// and TDateTimeDynArray will be written as escaped UTF-8 JSON strings // (and Iso-8601 textual encoding if necessary) // - any other kind of dynamic array (including array of records) will be // written as Base64 encoded binary stream,with a JSON_BASE64_MAGIC prefix // (UTF-8 encoded \uFFF0 special code) // - examples: '[1,2,3,4]' or '["\uFFF0base64encodedbinary"]' procedure AddDynArrayJSON(const DynArray: TDynArray); /// append some chars to the buffer in one line // - P should be ended with a #0 // - will write #1..#31 chars as spaces (so content will stay on the same line) procedure AddOnSameLine(P: PUTF8Char); overload; /// append some chars to the buffer in one line // - will write #0..#31 chars as spaces (so content will stay on the same line) procedure AddOnSameLine(P: PUTF8Char; Len: PtrInt); overload; /// append some wide chars to the buffer in one line // - will write #0..#31 chars as spaces (so content will stay on the same line) procedure AddOnSameLineW(P: PWord; Len: PtrInt); /// serialize as JSON the given object // - this default implementation will write null,or only write the // class name and pointer if FullExpand is true - use TJSONSerializer. // WriteObject method for full RTTI handling // - default implementation will write TList/TCollection/TStrings/TRawUTF8List // as appropriate array of class name/pointer (if FullExpand=true) or string procedure WriteObject(Value: TObject; HumanReadable: boolean=false; DontStoreDefault: boolean=true; FullExpand: boolean=false); virtual; /// the last char appended is canceled procedure CancelLastChar; {$ifdef HASINLINE}inline;{$endif} /// the last char appended is canceled if it was a ',' procedure CancelLastComma; {$ifdef HASINLINE}inline;{$endif} /// rewind the Stream to the position when Create() was called procedure CancelAll; /// count of add byte to the stream property TextLength: integer read GetLength; /// the internal TStream used for storage property Stream: TStream read fStream write fStream; end;
您可以看到,甚至有一些序列化可用,并且CancelLastComma / CancelLastChar方法对于从循环中生成快速的JSON或CSV数据非常有用.
关于速度和时间,这个例程比我的磁盘访问速度快,大约是100 MB / s.我认为在TMemoryStream而不是TFileStream中附加数据时可以实现大约500 MB / s.