On 23.07.2013 14:02, Andy Zubov wrote:
How i can read xml file with win-1251(russian lang) encoding?
Win-1251 encoding is not built into DIXml but is easily added as a custom encoding.
Please take a look at the demo project located in DIXml\Demos\DIXml_Custom_Encodings\DIXml_Custom_Encodings.dprYou will see that it registers a new character encoding which is used by the XML.
I have added windows-1251 support to DIXmlConverters.pas (attached). With this, you can register the windows-1251 character encoding:
{ Register the windows-1251 encoding. } xmlNewCharEncodingHandler( 'windows-1251', xmlWin1251ToUtf8, xmlUtf8ToWin1251);Afterwards, parse your document(s) as usual and windows-1251 support will be automatic.
Ralf
{------------------------------------------------------------------------------- Copyright (c) 1999-2013 Ralf Junker, The Delphi Inspiration Internet: http://www.yunqa.de/delphi/ E-Mail: delphi@xxxxxxxx -------------------------------------------------------------------------------} unit DIXmlConverters; {$I DICompilers.inc} interface uses DIXml; function xmlGB2312ToUtf8( Out_: C_char_ptr; OutLen: C_int_ptr; In_: C_char_ptr; InLen: C_int_ptr): C_int; function xmlUtf8ToGB2312( Out_: C_char_ptr; OutLen: C_int_ptr; In_: C_char_ptr; InLen: C_int_ptr): C_int; function xmlWin1251ToUtf8( Out_: C_char_ptr; OutLen: C_int_ptr; In_: C_char_ptr; InLen: C_int_ptr): C_int; function xmlUtf8ToWin1251( Out_: C_char_ptr; OutLen: C_int_ptr; In_: C_char_ptr; InLen: C_int_ptr): C_int; implementation uses DIConverters; function xmlEncodingWrapper( Out_: C_char_ptr; OutLen: C_int_ptr; In_: C_char_ptr; InLen: C_int_ptr; mbtowc: xxx_mbtowc; wctomb: xxx_wctomb): C_int; var convIn, convOut: conv_struct; pIn, pOut: PAnsiChar; lIn, lOut, rIn, rOut: Integer; u: ucs4_t; begin if Assigned(Out_) and Assigned(OutLen) and Assigned(InLen) then begin Result := 0; if Assigned(In_) then begin lIn := InLen^; if lIn > 0 then begin pOut := Out_; lOut := OutLen^; pIn := In_; convIn.ioState := 0; convOut.ioState := 0; repeat rIn := mbtowc(@convIn, u, pIn, lIn); if rIn > 0 then begin if lOut > 0 then begin rOut := wctomb(@convOut, pOut, u, lOut); if rOut > 0 then begin Inc(pOut, rOut); Dec(lOut, rOut); end else begin if lOut = RET_ILUNI then Result := -2; Break; end; end else Break; Inc(pIn, rIn); Dec(lIn, rIn); end else begin if rIn = RET_ILSEQ then Result := -2; Break; end; until lIn = 0; InLen^ := pIn - In_; OutLen^ := pOut - Out_; if Result = 0 then Result := OutLen^; Exit; end end; InLen^ := Result; OutLen^ := Result; end else Result := -1; end; function xmlGB2312ToUtf8( Out_: C_char_ptr; OutLen: C_int_ptr; In_: C_char_ptr; InLen: C_int_ptr): C_int; begin Result := xmlEncodingWrapper(Out_, OutLen, In_, InLen, euc_cn_mbtowc, utf8_wctomb); end; function xmlUtf8ToGB2312( Out_: C_char_ptr; OutLen: C_int_ptr; In_: C_char_ptr; InLen: C_int_ptr): C_int; begin Result := xmlEncodingWrapper(Out_, OutLen, In_, InLen, utf8_mbtowc, euc_cn_wctomb); end; function xmlWin1251ToUtf8( Out_: C_char_ptr; OutLen: C_int_ptr; In_: C_char_ptr; InLen: C_int_ptr): C_int; begin Result := xmlEncodingWrapper(Out_, OutLen, In_, InLen, cp1251_mbtowc, utf8_wctomb); end; function xmlUtf8ToWin1251( Out_: C_char_ptr; OutLen: C_int_ptr; In_: C_char_ptr; InLen: C_int_ptr): C_int; begin Result := xmlEncodingWrapper(Out_, OutLen, In_, InLen, utf8_mbtowc, cp1251_wctomb); end; end.