[yunqa.de] In DIXml this is a bug about encoding?

  • From: w w <wangwei.njcn@xxxxxxxxx>
  • To: yunqa@xxxxxxxxxxxxx
  • Date: Thu, 20 Aug 2009 17:33:59 +0800

hi;
  xml =
    '<?xml version="1.0" encoding="GB2312"?>' + #$A +
    '<xml>' + #$A +
    '大家好,欢迎使用 Delphi Inspiration 的软件开发包和工具.' + #$A +
    '我们的主页是: http://www.yunqa.de/delphi/' + #$A +
    '祝大家开心!' + #$A +
    '</xml>' + #$A;

      pchar:=pansichar(xml);
      Doc := xmlReadMemory(pchar, Length(pchar), nil, nil, 0);
   XPathObj := xmlXPathEvalExpression(PAnsiChar(Utf8Encode('//xml/text()')),
XPathCtx);

          cur := XPathObj^.NodeSetVal^.NodeTab[i];
          WriteLn('= element node ', cur^.Name);
          writeln('node value:',ansistring(cur^.Content));

if cur^.Content have chinese char,the output is unreadable codes; This
problem in parsing html string  is also ;
(But when I use below code
 Doc := htmlParseFile(PAnsiChar(f8), 'UTF-8'),
writeln('node value:',ansistring(cur^.Content));
the output is right.)
Please help me,thank you!


here is the content of DIXml_Custom_Encodings.dpr;
rocedure write_title_value(Nodes: xmlNodeSetPtr);
var
  cur: xmlNodePtr;
  NS: xmlNsPtr;
  i, Size: Integer;

begin
  if Assigned(Nodes) then
    Size := Nodes^.nodeNr
  else
    Size := 0;

  WriteLn('Writing ', Size, ' nodes:');
  WriteLn;

  for i := 0 to Size - 1 do
  begin
          cur := Nodes^.NodeTab[i];
          WriteLn('= element node ', cur^.Name);
          writeln('node value:',(cur^.Content));
  end;
end;
procedure ErrorCallback(
  UserData: C_void_ptr;
  Error: xmlErrorPtr);
begin
  WriteLn(Error^.Message);
end;

//------------------------------------------------------------------------------

const
  { All example XML below uses #$A as line break character. This is the
default
    line break character according to the XML specification. Section 2.l1
    asks that XML processors must transform both the two-character sequence
    #$D #$A and any #$D that is not followed by #$A to a single #$A
character.

    As DIXml conforms to this requirement, the result XML will not compare
equal
    to the input XML if #$D #$A were used in the original input. Therefore
we
    use #$A here, even if it is not the common Windows line break sequence.
}

  {$DEFINE 1} // Default: 1.
  {$IFDEF 1}
  { 1: Some XML in GB2312 sent to me from Chinese DIXml user. Many thanks! }
  xml =
    '<?xml version="1.0" encoding="GB2312"?>' + #$A +
    '<xml>' + #$A +
    '大家好,欢迎使用 Delphi Inspiration 的软件开发包和工具.' + #$A +
    '我们的主页是: http://www.yunqa.de/delphi/' + #$A +
    '祝大家开心!' + #$A +
    '</xml>' + #$A;
  {$ELSE 1}
  {$IFDEF 2}
  { 2: Some XML in GB2312. It contains arbitrary characters without meaning.
}
  xml =
    '<?xml version="1.0" encoding="GB2312"?>' + #$A +
    '<xml>' + #$A +
    ' 啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘' + #$A +
    '鞍氨安俺按暗岸胺案肮昂盎凹敖熬翱' + #$A +
    '袄傲奥懊澳芭捌扒叭吧笆八疤巴拔跋' + #$A +
    '靶把耙坝霸罢爸白柏百摆佰败拜稗斑' + #$A +
    '班搬扳般颁板版扮拌伴瓣半办绊邦帮' + #$A +
    '梆榜膀绑棒磅蚌镑傍谤苞胞包褒剥' + #$A +
    '</xml>' + #$A;
  {$ELSE 2}
  {$IFDEF 3}
  { 3: Some invalid XML in GB2312. Useful for decoder testing only. }
  xml =
    '<?xml version="1.0" encoding="GB2312"?>' + #$A +
    '<xml>' + #$A +
    '?a啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘' + #$A +
    '</xml>' + #$A;
  {$ELSE}
  %% You must define 1, 2, or 3. %%
    {$ENDIF 3}
  {$ENDIF 2}
  {$ENDIF 1}
var
  Doc: xmlDocPtr;
  Buf: xmlCharPtr;
  BufSize: Integer;
  pchar:pansichar;
  XPathCtx: xmlXPathContextPtr;
  XPathObj: xmlXPathObjectPtr;
begin
  try
    try
      xmlInitParser; // Initialize XML library.

      { Register the GB2312 encoding. }
      xmlNewCharEncodingHandler('GB2312', xmlGB2312ToUtf8, xmlUtf8ToGB2312);

      { Register an error handler to receive more detailed information
        in case something goes wrong. }
      xmlSetStructuredErrorFunc(nil, ErrorCallback);

      WriteLn(xml);
     //  xml:= EncodingToUtf8(xml, Encoding);
      pchar:=pansichar(xml);
      Doc := xmlReadMemory(pchar, Length(pchar), nil, nil, 0);
  { Create XPath evaluation context. }
  XPathCtx := xmlXPathNewContext(Doc);
  if not Assigned(XPathCtx) then
    begin
      WriteLn('Error: unable to create new XPath context');
      xmlFreeDoc(Doc);
      Exit;
    end;
  XPathObj := xmlXPathEvalExpression(PAnsiChar(Utf8Encode('//xml/text()')),
XPathCtx);
  write_title_value(XPathObj^.NodeSetVal);

   if Assigned(Doc) then
        begin
          xmlDocDumpFormatMemory(Doc, @Buf, @BufSize, 1);

          WriteLn;
          WriteLn(Buf);

          WriteLn;
          if StrComp(xml, Buf) = 0 then
            WriteLn('Input and Output are equal')
          else
            WriteLn('Input and Output are different');

          FreeMem(Buf);

          xmlFreeDoc(Doc);
        end
      else
        WriteLn('Error reading XML.');

      xmlCleanupParser; // Clear global XML library variables.
    except
      on e: Exception do
        WriteLn(e.Message);
    end;

  finally
    WriteLn;
    WriteLn('Done - press ENTER to exit.');
    ReadLn;
  end;
end.





 I use  rad2009 in chinese winxp.

thank you!

Other related posts: