Unicode and XML

In xml it's not wise to include the unicode chars by itself, besides of the 5 standard replacements:

'&' => '&'
'>' => '>'
'&lt;' => '<'
'&quot;' => '"'
'&apos;' => '''

You have for each unicode character a proper replacement in xml that is widely supported. So you don't run across heavy encoding problems I included an old piece of VB6 code with an incomplete list (most used codes only)

Private Function ReplaceCodesByUnicode(strTxt As String) As String
Dim t As Long
Dim strOut As String
Dim strChar As String

strOut = Replace(strTxt, "&#x00E1;", "á")
strOut = Replace(strOut, "&#x00C1;", "Á")
strOut = Replace(strOut, "&#x00E2;", "â")
strOut = Replace(strOut, "&#x00C2;", "Â")
strOut = Replace(strOut, "&#x00E0;", "à")
strOut = Replace(strOut, "&#x00C0;", "À")
strOut = Replace(strOut, "&#x00E5;", "å")
strOut = Replace(strOut, "&#x00C5;", "Å")
strOut = Replace(strOut, "&#x00E3;", "ã")
strOut = Replace(strOut, "&#x00C3;", "Ã")
strOut = Replace(strOut, "&#x00E4;", "ä")
strOut = Replace(strOut, "&#x00C4;", "Ä")
strOut = Replace(strOut, "&#x00E6;", "æ")
strOut = Replace(strOut, "&#x00C6;", "Æ")
strOut = Replace(strOut, "&#x00E7;", "ç")
strOut = Replace(strOut, "&#x00C7;", "Ç")
strOut = Replace(strOut, "&#x00D0;", "Ð")
strOut = Replace(strOut, "&#x00F0;", "ð")
strOut = Replace(strOut, "&#x00E9;", "é")
strOut = Replace(strOut, "&#x00C9;", "É")
strOut = Replace(strOut, "&#x00EA;", "ê")
strOut = Replace(strOut, "&#x00CA;", "Ê")
strOut = Replace(strOut, "&#x00E8;", "è")
strOut = Replace(strOut, "&#x00C8;", "È")
strOut = Replace(strOut, "&#x00EB;", "ë")
strOut = Replace(strOut, "&#x00CB;", "Ë")
strOut = Replace(strOut, "&#x00ED;", "í")
strOut = Replace(strOut, "&#x00CD;", "Í")
strOut = Replace(strOut, "&#x00EE;", "î")
strOut = Replace(strOut, "&#x00CE;", "Î")
strOut = Replace(strOut, "&#x00EC;", "ì")
strOut = Replace(strOut, "&#x00CC;", "Ì")
strOut = Replace(strOut, "&#x00EF;", "ï")
strOut = Replace(strOut, "&#x00CF;", "Ï")
strOut = Replace(strOut, "&#x00F1;", "ñ")
strOut = Replace(strOut, "&#x00D1;", "Ñ")
strOut = Replace(strOut, "&#x00F3;", "ó")
strOut = Replace(strOut, "&#x00D3;", "Ó")
strOut = Replace(strOut, "&#x00F4;", "ô")
strOut = Replace(strOut, "&#x00D4;", "Ô")
strOut = Replace(strOut, "&#x00F2;", "ò")
strOut = Replace(strOut, "&#x00D2;", "Ò")
strOut = Replace(strOut, "&#x2298;", "ø")
strOut = Replace(strOut, "&#x00D8;", "Ø")
strOut = Replace(strOut, "&#x00F5;", "õ")
strOut = Replace(strOut, "&#x00D5;", "Õ")
strOut = Replace(strOut, "&#x00F6;", "ö")
strOut = Replace(strOut, "&#x00D6;", "Ö")
strOut = Replace(strOut, "&#x00DF;", "ß")
strOut = Replace(strOut, "&#x00FE;", "þ")
strOut = Replace(strOut, "&#x00DE;", "Þ")
strOut = Replace(strOut, "&#x00FA;", "ú")
strOut = Replace(strOut, "&#x00DA;", "Ú")
strOut = Replace(strOut, "&#x00DB;", "Û")
strOut = Replace(strOut, "&#x00FB;", "û")
strOut = Replace(strOut, "&#x00F9;", "ù")
strOut = Replace(strOut, "&#x00D9;", "Ù")
strOut = Replace(strOut, "&#x00FC;", "ü")
strOut = Replace(strOut, "&#x00DC;", "Ü")
strOut = Replace(strOut, "&#x00FD;", "ý")
strOut = Replace(strOut, "&#x00DD;", "Ý")
strOut = Replace(strOut, "&#x00FF;", "ÿ")
strOut = Replace(strOut, "&#x20AC;", "€")
strOut = Replace(strOut, "&#x00B4;", "´")
strOut = Replace(strOut, "&#x0060;", "`")
strOut = Replace(strOut, "&#x007E;", "~")
strOut = Replace(strOut, "&#x00A8;", "¨")
strOut = Replace(strOut, "&#x00A9;", "©")
strOut = Replace(strOut, "&#x2218;", "^")
strOut = Replace(strOut, "&#x00A7;", "§")
strOut = Replace(strOut, "&#x00B5;", "µ")
strOut = Replace(strOut, "&#x00A3;", "£")
strOut = Replace(strOut, "&#x0024;", "$")
strOut = Replace(strOut, "&#x00B0;", "°")
strOut = Replace(strOut, "&#x0040;", "@")
strOut = Replace(strOut, "&#x00B2;", "²")
strOut = Replace(strOut, "&#x00B3;", "³")
strOut = Replace(strOut, "&#x00AB;", "«")
strOut = Replace(strOut, "&#x00BB;", "»")

ReplaceCodesByUnicode = strOut
End Function
Private Function ReplaceUnicodeByCodes(strTxt As String) As String
Dim t As Long
Dim strOut As String
Dim strBuf As String
Dim strChar As String

strOut = ""
strBuf = ""
For t = 1 To Len(strTxt)
strChar = Mid$(strTxt, t, 1)
Select Case strChar
Case Is = "á"
strBuf = strBuf & "&#x00E1;"
Case Is = "Á"
strBuf = strBuf & "&#x00C1;"
Case Is = "â"
strBuf = strBuf & "&#x00E2;"
Case Is = "Â"
strBuf = strBuf & "&#x00C2;"
Case Is = "à"
strBuf = strBuf & "&#x00E0;"
Case Is = "À"
strBuf = strBuf & "&#x00C0;"
Case Is = "å"
strBuf = strBuf & "&#x00E5;"
Case Is = "Å"
strBuf = strBuf & "&#x00C5;"
Case Is = "ã"
strBuf = strBuf & "&#x00E3;"
Case Is = "Ã"
strBuf = strBuf & "&#x00C3;"
Case Is = "ä"
strBuf = strBuf & "&#x00E4;"
Case Is = "Ä"
strBuf = strBuf & "&#x00C4;"
Case Is = "æ"
strBuf = strBuf & "&#x00E6;"
Case Is = "Æ"
strBuf = strBuf & "&#x00C6;"
Case Is = "ç"
strBuf = strBuf & "&#x00E7;"
Case Is = "Ç"
strBuf = strBuf & "&#x00C7;"
Case Is = "Ð"
strBuf = strBuf & "&#x00D0;"
Case Is = "ð"
strBuf = strBuf & "&#x00F0;"
Case Is = "é"
strBuf = strBuf & "&#x00E9;"
Case Is = "É"
strBuf = strBuf & "&#x00C9;"
Case Is = "ê"
strBuf = strBuf & "&#x00EA;"
Case Is = "Ê"
strBuf = strBuf & "&#x00CA;"
Case Is = "è"
strBuf = strBuf & "&#x00E8;"
Case Is = "È"
strBuf = strBuf & "&#x00C8;"
Case Is = "ë"
strBuf = strBuf & "&#x00EB;"
Case Is = "Ë"
strBuf = strBuf & "&#x00CB;"
Case Is = "í"
strBuf = strBuf & "&#x00ED;"
Case Is = "Í"
strBuf = strBuf & "&#x00CD;"
Case Is = "î"
strBuf = strBuf & "&#x00EE;"
Case Is = "Î"
strBuf = strBuf & "&#x00CE;"
Case Is = "ì"
strBuf = strBuf & "&#x00EC;"
Case Is = "Ì"
strBuf = strBuf & "&#x00CC;"
Case Is = "ï"
strBuf = strBuf & "&#x00EF;"
Case Is = "Ï"
strBuf = strBuf & "&#x00CF;"
Case Is = "ñ"
strBuf = strBuf & "&#x00F1;"
Case Is = "Ñ"
strBuf = strBuf & "&#x00D1;"
Case Is = "ó"
strBuf = strBuf & "&#x00F3;"
Case Is = "Ó"
strBuf = strBuf & "&#x00D3;"
Case Is = "ô"
strBuf = strBuf & "&#x00F4;"
Case Is = "Ô"
strBuf = strBuf & "&#x00D4;"
Case Is = "ò"
strBuf = strBuf & "&#x00F2;"
Case Is = "Ò"
strBuf = strBuf & "&#x00D2;"
Case Is = "ø"
strBuf = strBuf & "&#x2298;"
Case Is = "Ø"
strBuf = strBuf & "&#x00D8;"
Case Is = "õ"
strBuf = strBuf & "&#x00F5;"
Case Is = "Õ"
strBuf = strBuf & "&#x00D5;"
Case Is = "ö"
strBuf = strBuf & "&#x00F6;"
Case Is = "Ö"
strBuf = strBuf & "&#x00D6;"
Case Is = "ß"
strBuf = strBuf & "&#x00DF;"
Case Is = "þ"
strBuf = strBuf & "&#x00FE;"
Case Is = "Þ"
strBuf = strBuf & "&#x00DE;"
Case Is = "ú"
strBuf = strBuf & "&#x00FA;"
Case Is = "Ú"
strBuf = strBuf & "&#x00DA;"
Case Is = "Û"
strBuf = strBuf & "&#x00DB;"
Case Is = "û"
strBuf = strBuf & "&#x00FB;"
Case Is = "ù"
strBuf = strBuf & "&#x00F9;"
Case Is = "Ù"
strBuf = strBuf & "&#x00D9;"
Case Is = "ü"
strBuf = strBuf & "&#x00FC;"
Case Is = "Ü"
strBuf = strBuf & "&#x00DC;"
Case Is = "ý"
strBuf = strBuf & "&#x00FD;"
Case Is = "Ý"
strBuf = strBuf & "&#x00DD;"
Case Is = "ÿ"
strBuf = strBuf & "&#x00FF;"
Case Is = "€"
strBuf = strBuf & "&#x20AC;"
Case Is = "´"
strBuf = strBuf & "&#x00B4;"
Case Is = "’"
strBuf = strBuf & "&#x00B4;"
Case Is = "`"
strBuf = strBuf & "&#x0060;"
Case Is = "~"
strBuf = strBuf & "&#x007E;"
Case Is = "¨"
strBuf = strBuf & "&#x00A8;"
Case Is = "^"
strBuf = strBuf & "&#x2218;"
Case Is = "§"
strBuf = strBuf & "&#x00A7;"
Case Is = "µ"
strBuf = strBuf & "&#x00B5;"
Case Is = "£"
strBuf = strBuf & "&#x00A3;"
Case Is = "$"
strBuf = strBuf & "&#x0024;"
Case Is = "©"
strBuf = strBuf & "&#x00A9;"
Case Is = "°"
strBuf = strBuf & "&#x00B0;"
Case Is = "@"
strBuf = strBuf & "&#x0040;"
Case Is = "²"
strBuf = strBuf & "&#x00B2;"
Case Is = "³"
strBuf = strBuf & "&#x00B3;"
Case Is = "«"
strBuf = strBuf & "&#x00AB;"
Case Is = "»"
strBuf = strBuf & "&#x00BB;"
Case Else
strBuf = strBuf & strChar
End Select
If (t Mod 4096) = 0 Then
strOut = strOut & strBuf
strBuf = ""
End If
Next t
strOut = strOut & strBuf

ReplaceUnicodeByCodes = strOut
End Function

More character can be found on http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
For xml the code is in the third column so Yen (¥) => '&#x00A5;'

Hopefully it helps you as much as it helped me
BTW v1.2 Beta 12 is for me release quality

Greetings
Gino Deblauwe
Chief programming
UseIt Group NV

FTC Website: 
http://www.truenorthsoftware.com/FormattedTextControl/FormattedTextControl.html
Set List Options (digest and vacation modes): www.freelists.org/list/ftcdev
List Archive: www.freelists.org/archives/ftcdev
Unsubscribe: Send email to ftcdev-request@xxxxxxxxxxxxx with "unsubscribe" in 
the subject field.



Other related posts: