Thanks for the quick help! I've slightly modified your example, so it compiles for me: Replace the NativeUInt fields in TUserData with Int64. Otherwise I get the error '[DCC Error] DIXml_SAX2_fMain.pas(791): F2084 Internal Error: C1264', which does not tell me anything. Just a nitpick, replace startElementNs with endElementNs in
Memo.Lines.Add(Format('EndTagsNS: %8d', [User.startElementNs])); if you want to provide the modified demo.For testing I modified your demo further and store the character data found in a buffer and process it when an end tag is detected. However, this fails for the attributes. I've set a breakpoint at the start of SaxAttributeDecl, but it is never encountered. Any advice? I've attached the hacked sources for your convenience.
Thanks, Torsten
{ DIXml demo to read an XML document using the SAX2 functions. For each XML Visit the DIXml Internet site for latest information and updates: http://www.yunqa.de/delphi/ Copyright (c) 2007-2012 Ralf Junker, The Delphi Inspiration <delphi@xxxxxxxx> ------------------------------------------------------------------------------ } unit DIXml_SAX2_fMain; {$I DICompilers.inc} interface uses DISystemCompat, {$IFDEF HAS_UNITSCOPE} System.Classes, Vcl.Controls, Vcl.Forms, Vcl.StdCtrls, Vcl.ExtCtrls, Vcl.ComCtrls, {$ELSE HAS_UNITSCOPE} Classes, Controls, Forms, StdCtrls, ExtCtrls, ComCtrls, {$ENDIF HAS_UNITSCOPE} DIXml; type TfrmNodeTree = class(TForm) PageControl: TPageControl; txOutput: TTabSheet; tsErrors: TTabSheet; memoErrors: TMemo; pnlTop: TPanel; btnLoad: TButton; edtUrl: TEdit; lblURL: TLabel; btnUrl: TButton; Memo: TMemo; cbxPerformance: TCheckBox; procedure btnLoadClick(Sender: TObject); procedure FormCreate(Sender: TObject); procedure btnUrlClick(Sender: TObject); procedure FormDestroy(Sender: TObject); private FSaxHandler: xmlSAXHandler; protected procedure ParseUrl(const AFileName: string); end; var frmNodeTree: TfrmNodeTree; langBuffer, buffer: string; const APP_TITLE = 'DIXml Demo: SAX2'; implementation uses {$IFDEF HAS_UNITSCOPE} Winapi.Windows, System.SysUtils, System.IniFiles, Vcl.Dialogs {$ELSE HAS_UNITSCOPE} Windows, SysUtils, IniFiles, Dialogs {$ENDIF HAS_UNITSCOPE} ; {$R *.dfm} //------------------------------------------------------------------------------ function FileNameIsHtml(const AFileName: string): Boolean; var Ext: string; begin Ext := ExtractFileExt(AFileName); Result := (CompareText(Ext, '.htm') = 0) or (CompareText(Ext, '.html') = 0); end; //------------------------------------------------------------------------------ // SAX handler callbacks //------------------------------------------------------------------------------ type TUserData = record Strings: TStrings; startElement: Int64; startElementNs: Int64; endElement: Int64; endElementNs: Int64; // startElement: NativeUInt; // startElementNs: NativeUInt; // endElement: NativeUInt; // endElementNs: NativeUInt; end; PUserData = ^TUserData; procedure SaxStartDocument(Ctx: Pointer); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('StartDocument'); end; //------------------------------------------------------------------------------ procedure SaxEndDocument(Ctx: Pointer); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('EndDocument'); end; //------------------------------------------------------------------------------ function SaxIsStandalone(Ctx: Pointer): Integer; var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('IsStandalone'); Result := 0; end; //------------------------------------------------------------------------------ function SaxHasInternalSubset( Ctx: Pointer): Integer; var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('HasInternalSubset'); Result := 0; end; //------------------------------------------------------------------------------ function SaxHasExternalSubset( Ctx: Pointer): Integer; var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('HasExternalSubset'); Result := 0; end; //------------------------------------------------------------------------------ function SaxResolveEntity( Ctx: Pointer; const PublicID: xmlCharPtr; const SystemID: xmlCharPtr): xmlParserInputPtr; var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('ResolveEntity'); Result := nil; end; //------------------------------------------------------------------------------ procedure SaxNotationDecl( Ctx: Pointer; const Name: xmlCharPtr; const PublicID: xmlCharPtr; const SystemID: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add( 'NotationDecl: "' + Utf8ToString(Name) + '" "' + Utf8ToString(PublicID) + '" "' + Utf8ToString(SystemID) + '"'); end; //------------------------------------------------------------------------------ procedure SaxProcessingInstruction( Ctx: Pointer; const Target: xmlCharPtr; const Data: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add( 'XML PI: "' + Utf8ToString(Target) + '" "' + Utf8ToString(Data) + '"'); end; //------------------------------------------------------------------------------ procedure SaxComment( Ctx: Pointer; const Value: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('Comment: "' + Value + '"'); end; //------------------------------------------------------------------------------ procedure SaxCDataBlock( Ctx: Pointer; const Value: xmlCharPtr; Len: Integer); var s: Utf8String; User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then begin SetString(s, Value, Len); User^.Strings.Add('CDataBlock: "' + Utf8ToString(s) + '"'); end; end; //------------------------------------------------------------------------------ { The StartElement and EndElement callbacks are triggered for each start or end element, respectively. For both, the name parameter is the name of the element. The attrs parameter contains the attributes for the start tag. The even indices in the array will be attribute names, the odd indices are the values, and the final index will contain nil. } procedure SaxStartElement( Ctx: Pointer; const Name: xmlCharPtr; const atts: xmlCharPtrArrayPtr); var i: Integer; AttrName, AttrValue: xmlCharPtr; User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then begin User^.Strings.Add('Start Element: "' + Utf8ToString(Name) + '"'); if Assigned(atts) then begin i := 0; AttrName := atts[i]; while Assigned(AttrName) do begin AttrValue := atts[i + 1]; User^.Strings.Add( ' * Attribute: "' + Utf8ToString(AttrName) + '" "' + Utf8ToString(AttrValue) + '"'); Inc(i, 2); AttrName := atts[i]; end; end; end; Inc(User^.startElement) end; //------------------------------------------------------------------------------ procedure SaxEndElement( Ctx: Pointer; const Name: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('End Element: "' + Name + '"'); Inc(User^.endElement); end; //------------------------------------------------------------------------------ procedure SaxIgnorableWhitespace( Ctx: Pointer; const Ch: xmlCharPtr; Len: Integer); var s: Utf8String; User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then begin SetString(s, Ch, Len); User^.Strings.Add('Ignorable Whitespace: "' + Utf8ToString(s) + '"'); end; end; //------------------------------------------------------------------------------ { The characters callback is called when there are characters that are outside of tags get parsed. The xmlCharPtr type is used in places where DIXml expects to receive, or provides valid UTF-8 data. In most cases, you can think of ch as a normal character string, although for correct processing you will need to use proper UTF-8 decoding / encoding. Note that the character data is not necessarily zero-terminated. This is so that DIXml does not need to copy the character data out of its internal buffers before passing it to the callback. In your callback, you will probably want to copy the characters to some other buffer so that it can be used from the endElement callback. To optimise this callback a bit, you might adjust the callback so that it only copies the characters if the parser is in a certain state. Note that the characters callback may be called more than once between calls to startElement and endElement. } procedure SaxCharacters( Ctx: Pointer; const Ch: xmlCharPtr; Len: Integer); var s: Utf8String; var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then begin SetString(s, Ch, Len); User^.Strings.Add('Characters: "' + Utf8ToString(s) + '"'); end; SetString(s, Ch, Len); // frmNodeTree.Memo.Lines.Add('Characters: ' + Utf8ToString(s)); buffer := buffer + Utf8ToString(s); end; //------------------------------------------------------------------------------ { The GetEntities callback is invoked to handle entities in the SAX interface. The xmlEntityPtr structure holds some information about the entity. This structure will not be freed by the parser, so it makes sense to create these structures once, and then pass a pointer to the appropriate one when this function is called. After calling getEntity, the expansion of the entity is passed to the characters callback. This way, you do not need to worry about decoding entities anywhere else in your callback routines. If your XML document only contains the standard entities (<, >, ', " and &), then it is possible to use the following very short implementation for this callback: } function SaxGetEntity( Ctx: Pointer; const Name: xmlCharPtr): xmlEntityPtr; var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then begin User^.Strings.Add('GetEntity: "' + Name + '"'); end; Result := xmlGetPredefinedEntity(Name); end; //------------------------------------------------------------------------------ function SaxGetParameterEntity( Ctx: Pointer; const Name: xmlCharPtr): xmlEntityPtr; var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('GetParam.Entity: "' + Name + '"'); Result := nil; end; //------------------------------------------------------------------------------ procedure SaxElementDecl( Ctx: Pointer; const Name: xmlCharPtr; type_: Integer; Content: xmlElementContentPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('ElementDecl: "' + Name + '"'); end; //------------------------------------------------------------------------------ procedure SaxUnparsedEntityDecl( Ctx: Pointer; const Name: xmlCharPtr; const PublicID: xmlCharPtr; const SystemID: xmlCharPtr; const NotationName: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add( 'SaxUnparsedEntityDecl: "' + Utf8ToString(Name) + '" "' + Utf8ToString(PublicID) + '" "' + Utf8ToString(SystemID) + '" "' + Utf8ToString(NotationName) + '"'); end; //------------------------------------------------------------------------------ procedure SaxDocumentLocator( Ctx: Pointer; loc: xmlSAXLocatorPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('SaxDocumentLocator'); end; //------------------------------------------------------------------------------ procedure SaxInternalSubset( Ctx: Pointer; const Name: xmlCharPtr; const ExternalID: xmlCharPtr; const SystemID: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add( 'Int. Subset: "' + Utf8ToString(Name) + '" "' + Utf8ToString(ExternalID) + '" "' + Utf8ToString(SystemID) + '"'); end; //------------------------------------------------------------------------------ procedure SaxExternalSubset( Ctx: Pointer; const Name: xmlCharPtr; const ExternalID: xmlCharPtr; const SystemID: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add( 'Ext. Subset: "' + Utf8ToString(Name) + '" "' + Utf8ToString(ExternalID) + '" "' + Utf8ToString(SystemID) + '"'); end; //------------------------------------------------------------------------------ procedure SaxStartElementNS( Ctx: Pointer; const LocalName: xmlCharPtr; const Prefix: xmlCharPtr; const URI: xmlCharPtr; nb_namespaces: Integer; const NameSpaces: xmlCharPtrArrayPtr; nb_attributes: Integer; nb_defaulted: Integer; const Attributes: xmlCharPtrArrayPtr); var i, n: Integer; AttrName, AttrPrefix, AttrUri, AttrValueStart, AttrValueEnd: xmlCharPtr; AttrValue: Utf8String; NsPrefix, NsUri: xmlCharPtr; var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then begin User^.Strings.Add( 'StartElementNS: "' + Utf8ToString(LocalName) + '" "' + Utf8ToString(Prefix) + '" "' + Utf8ToString(URI) + '"'); { NameSpaces. } i := 0; for n := 0 to nb_namespaces - 1 do begin NsPrefix := NameSpaces[i]; NsUri := NameSpaces[i + 1]; User^.Strings.Add( ' * NameSpace: "' + Utf8ToString(NsPrefix) + '" "' + Utf8ToString(NsUri) + '"'); Inc(i, 2); end; { Attributes. } i := 0; for n := 0 to nb_attributes - 1 do begin AttrName := Attributes[i]; AttrPrefix := Attributes[i + 1]; AttrUri := Attributes[i + 2]; AttrValueStart := Attributes[i + 3]; AttrValueEnd := Attributes[i + 4]; SetString(AttrValue, AttrValueStart, AttrValueEnd - AttrValueStart); User^.Strings.Add( ' * Attribute: "' + Utf8ToString(AttrName) + '" "' + Utf8ToString(AttrPrefix) + '" "' + Utf8ToString(AttrUri) + '" "' + Utf8ToString(AttrValue) + '"'); frmNodeTree.Memo.Lines.Add('Attr: ' + (Utf8ToString(AttrName))); Inc(i, 5); end; end; Inc(User^.startElementNs); end; //------------------------------------------------------------------------------ procedure SaxEndElementNS( Ctx: Pointer; const LocalName: xmlCharPtr; const Prefix: xmlCharPtr; const URI: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add( 'EndElementNS: "' + Utf8ToString(LocalName) + '" "' + Utf8ToString(Prefix) + '" "' + Utf8ToString(URI) + '"'); frmNodeTree.Memo.Lines.Add('Found: ' + LocalName); if trim(buffer) <> '' then frmNodeTree.Memo.Lines.Add('Trimmed Buffer: (' + langBuffer + ') ' + trim(buffer)); buffer := ''; langBuffer := 'none'; Inc(User^.endElementNs) end; //------------------------------------------------------------------------------ procedure SaxAttributeDecl( Ctx: Pointer; const elem: xmlCharPtr; const FullName: xmlCharPtr; type_: Integer; Def: Integer; const DefaultValue: xmlCharPtr; Tree: xmlEnumerationPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add( 'Attrib.Decl.: "' + Utf8ToString(elem) + '" "' + Utf8ToString(FullName) + '" "' + Utf8ToString(DefaultValue) + '"'); langBuffer := Utf8ToString(DefaultValue); end; //------------------------------------------------------------------------------ procedure SaxEntityDecl( Ctx: Pointer; const Name: xmlCharPtr; type_: Integer; const PublicID: xmlCharPtr; const SystemID: xmlCharPtr; Content: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add( 'EntityDecl: "' + Utf8ToString(Name) + '" "' + Utf8ToString(PublicID) + '" "' + Utf8ToString(SystemID) + '" "' + Utf8ToString(Content) + '"'); end; //------------------------------------------------------------------------------ procedure SaxReference( Ctx: Pointer; const Name: xmlCharPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then User^.Strings.Add('Reference: "' + Name + '"'); end; //------------------------------------------------------------------------------ { This is the error callback. } procedure SaxStructuredError(Ctx: Pointer; Error: xmlErrorPtr); var User: PUserData; begin User := Ctx; if Assigned(User^.Strings) then begin User^.Strings.Add(Utf8ToString(Error^.Message) + ' in file ' + Utf8ToString(Error^.File_) + ' line ' + IntToStr(Error^.Line) + ' column ' + IntToStr(Error^.int2)); if Assigned(Error^.Str1) then User^.Strings.Add(Utf8ToString(Error^.Str1)); if Assigned(Error^.Str2) then User^.Strings.Add(Utf8ToString(Error^.Str2)); if Assigned(Error^.str3) then User^.Strings.Add(Utf8ToString(Error^.str3)); end; end; //------------------------------------------------------------------------------ procedure TfrmNodeTree.FormCreate(Sender: TObject); var dim: string; begin langBuffer := 'none'; buffer := ''; Caption := APP_TITLE; tsErrors.TabVisible := False; // Hide errors tab for now. { Read form position. } with TIniFile.Create(ChangeFileExt(ParamStr(0), '.ini')) do try dim := IntToStr(Screen.Width) + 'x' + IntToStr(Screen.Height); SetBounds( ReadInteger(dim, 'Left', Left), ReadInteger(dim, 'Top', Top), ReadInteger(dim, 'Width', Width), ReadInteger(dim, 'Height', Height)); edtUrl.Text := ReadString('Options', 'URL', edtUrl.Text); finally Free; end; xmlInitParser; // Initialize the XML library. { Initialize the SAX2 handler. To start off with, we set all functions to nil. If we use a nil SAX parser like this, then we will have a parser that only checks that the document is well formed. } FillChar(FSaxHandler, SizeOf(FSaxHandler), 0); { By adding a few callbacks to the SAX handler, we can get the parser to do just about anything. } FSaxHandler.internalSubset := SaxInternalSubset; FSaxHandler.isStandalone := SaxIsStandalone; FSaxHandler.hasInternalSubset := SaxHasInternalSubset; FSaxHandler.hasExternalSubset := SaxHasExternalSubset; FSaxHandler.resolveEntity := SaxResolveEntity; FSaxHandler.GetEntity := SaxGetEntity; FSaxHandler.entityDecl := SaxEntityDecl; FSaxHandler.notationDecl := SaxNotationDecl; FSaxHandler.attributeDecl := SaxAttributeDecl; FSaxHandler.elementDecl := SaxElementDecl; FSaxHandler.unparsedEntityDecl := SaxUnparsedEntityDecl; FSaxHandler.setDocumentLocator := SaxDocumentLocator; FSaxHandler.startDocument := SaxStartDocument; FSaxHandler.endDocument := SaxEndDocument; FSaxHandler.startElement := SaxStartElement; FSaxHandler.endElement := SaxEndElement; FSaxHandler.reference := SaxReference; FSaxHandler.characters := SaxCharacters; FSaxHandler.ignorableWhitespace := SaxIgnorableWhitespace; FSaxHandler.processingInstruction := SaxProcessingInstruction; FSaxHandler.Comment := SaxComment; FSaxHandler.Warning := nil; // We use StructuredError below. FSaxHandler.Error := nil; // We use StructuredError below. FSaxHandler.fatalError := nil; // Unused FSaxHandler.getParameterEntity := SaxGetParameterEntity; FSaxHandler.cdataBlock := SaxCDataBlock; FSaxHandler.externalSubset := SaxExternalSubset; { Let the parser know that we have successfully initialized this SAX2 handler. It checks for this magic before it triggers the structure error callback. } FSaxHandler.Initialized := XML_SAX2_MAGIC; FSaxHandler.private_ := nil; FSaxHandler.startElementNs := SaxStartElementNS; FSaxHandler.endElementNs := SaxEndElementNS; FSaxHandler.serror := SaxStructuredError; end; //------------------------------------------------------------------------------ procedure TfrmNodeTree.FormDestroy(Sender: TObject); var dim: string; r: TRect; WinPlacement: TWindowPlacement; begin xmlCleanupParser; // Clear global XML library variables. { Save form position. } WinPlacement.Length := SizeOf(WinPlacement); GetWindowPlacement(Self.Handle, @WinPlacement); r := WinPlacement.rcNormalPosition; with TIniFile.Create(ChangeFileExt(ParamStr(0), '.ini')) do try dim := IntToStr(Screen.Width) + 'x' + IntToStr(Screen.Height); WriteInteger(dim, 'Left', r.Left); WriteInteger(dim, 'Top', r.Top); WriteInteger(dim, 'Width', r.Right - r.Left); WriteInteger(dim, 'Height', r.Bottom - r.Top); WriteString('Options', 'URL', edtUrl.Text); finally Free; end; end; //------------------------------------------------------------------------------ procedure TfrmNodeTree.ParseUrl(const AFileName: string); var f8: Utf8String; User: TUserData; tc: Cardinal; begin tsErrors.TabVisible := False; memoErrors.Lines.BeginUpdate; try memoErrors.Clear; Memo.Lines.BeginUpdate; try Memo.Clear; f8 := Utf8Encode(AFileName); if cbxPerformance.Checked then User.Strings := nil else User.Strings := Memo.Lines; User.startElement := 0; User.startElementNs := 0; User.endElement := 0; User.endElementNs := 0; tc := GetTickCount; xmlSAXUserParseFile(@FSaxHandler, @User, PAnsiChar(f8)); if not Assigned(User.Strings) then begin tc := GetTickCount - tc; Memo.Lines.Add(Format('Parsed in %d milliseconds.', [tc])); Memo.Lines.Add(''); Memo.Lines.Add('Document statistics:'); Memo.Lines.Add(''); Memo.Lines.Add(Format('StartTag: %8d', [User.startElement])); Memo.Lines.Add(Format('StartTagNS: %8d', [User.startElementNs])); Memo.Lines.Add(Format('EndTag: %8d', [User.endElement])); Memo.Lines.Add(Format('EndTagsNS: %8d', [User.endElementNs])); end; finally Memo.Lines.EndUpdate; end; if memoErrors.Lines.Count > 0 then tsErrors.TabVisible := True; finally memoErrors.Lines.EndUpdate; end; end; //------------------------------------------------------------------------------ procedure TfrmNodeTree.btnLoadClick(Sender: TObject); begin ParseUrl(edtUrl.Text); end; //------------------------------------------------------------------------------ procedure TfrmNodeTree.btnUrlClick(Sender: TObject); begin with TOpenDialog.Create(nil) do try Filter := 'Supported file (*.xml;*.xsl;*.htm;*.html)|*.xml;*.xsl;*.htm;*.html|' + 'XML file (*.xml)|*.xml|' + 'XSL file (*.xsl)|*.xsl|' + 'HTML file (*.htm;*.html)|*.htm;*.html|' + 'Any file (*.*)|*.*'; Options := Options + [ofEnableSizing, ofFileMustExist, ofPathMustExist]; if Execute then begin edtUrl.Text := FileName; end; finally Free; end; end; initialization // xmlKeepBlanksDefault(0); end.