FAQ
Reviewers: golang-dev_googlegroups.com,

Message:
Hello golang-dev@googlegroups.com (cc: golang-dev@googlegroups.com),

I'd like you to review this change to
https://code.google.com/p/go


Description:
encoding/xml: Marshal/Escape allows invalid characters

Fixes issue 4235.

Please review this at https://codereview.appspot.com/7438051/

Affected files:
M src/pkg/encoding/xml/marshal_test.go
M src/pkg/encoding/xml/xml.go
M src/pkg/encoding/xml/xml_test.go


Index: src/pkg/encoding/xml/marshal_test.go
===================================================================
--- a/src/pkg/encoding/xml/marshal_test.go
+++ b/src/pkg/encoding/xml/marshal_test.go
@@ -839,6 +839,10 @@
Value: &Domain{Comment: []byte("f--bar")},
Err: `xml: comments must not contain "--"`,
},
+ {
+ Value: &Book{Title: string("\x00")},
+ Err: "xml: invalid character code U+0000",
+ },
}

var marshalIndentTests = []struct {
Index: src/pkg/encoding/xml/xml.go
===================================================================
--- a/src/pkg/encoding/xml/xml.go
+++ b/src/pkg/encoding/xml/xml.go
@@ -1720,9 +1720,45 @@
esc_cr = []byte("
")
)

-// EscapeText writes to w the properly escaped XML equivalent
-// of the plain text data s.
-func EscapeText(w io.Writer, s []byte) error {
+// Checks whether a Unicode code point is NOT within XML 1.0 charset.
+// see: http://www.w3.org/TR/2008/REC-xml-20081126/
+func isInvalidChar(r rune) bool {
+ switch {
+ case (r == '\u0009') || (r == '\u000A') || (r == '\u000D'):
+ case (r >= '\u0020') && (r <= '\uD7FF'):
+ case (r >= '\uE000') && (r <= '\uFFFD'):
+ case (r >= '\U00010000') && (r <= '\U0010FFFF'):
+ default:
+ return true
+ }
+ return false
+}
+
+// Returns a byte slice in which all non XML 1.0 compliant runes
+// are replaced by the Unicode replacement character.
+func fixCharset(s []byte) []byte {
+ fix := func(r rune) rune {
+ if isInvalidChar(r) {
+ // Unicode replacement character
+ return '\uFFFD'
+ }
+ return r
+ }
+ return bytes.Map(fix, s)
+}
+
+// Writes to w the properly escaped xml equivalent of the plain text data
s.
+// If strictCharset is false then all non XML 1.0 compliant characters are
+// converted to the Unicode replacement character, otherwise an error is
+// returned.
+func escape(w io.Writer, s []byte, strictCharset bool) error {
+ if i := bytes.IndexFunc(s, isInvalidChar); i != -1 {
+ if strictCharset {
+ return fmt.Errorf("xml: invalid character code %U", bytes.Runes(s)[i])
+ }
+ s = fixCharset(s)
+ }
+
var esc []byte
last := 0
for i, c := range s {
@@ -1760,11 +1796,17 @@
return nil
}

+// escapetext writes to w the properly escaped xml equivalent
+// of the plain text data s.
+func EscapeText(w io.Writer, s []byte) error {
+ return escape(w, s, true)
+}
+
// Escape is like EscapeText but omits the error return value.
// It is provided for backwards compatibility with Go 1.0.
// Code targeting Go 1.1 or later should use EscapeText.
func Escape(w io.Writer, s []byte) {
- EscapeText(w, s)
+ escape(w, s, false)
}

// procInstEncoding parses the `encoding="..."` or `encoding='...'`
Index: src/pkg/encoding/xml/xml_test.go
===================================================================
--- a/src/pkg/encoding/xml/xml_test.go
+++ b/src/pkg/encoding/xml/xml_test.go
@@ -5,6 +5,7 @@
package xml

import (
+ "bytes"
"fmt"
"io"
"reflect"
@@ -700,6 +701,29 @@
err := EscapeText(errWriter{}, []byte{'A'})

if err == nil || err.Error() != expectErr {
- t.Errorf("EscapeTest = [error] %v, want %v", err, expectErr)
+ t.Errorf("[error] %v, want %v", err, expectErr)
}
}
+
+func TestEscapeTextInvalidChar(t *testing.T) {
+ input := []byte("A \x00 terminated string.")
+ expectErr := "xml: invalid character code U+0000"
+
+ buff := new(bytes.Buffer)
+ if err := EscapeText(buff, input); err.Error() != expectErr {
+ t.Errorf("[error] %v, want %v", err, expectErr)
+ }
+}
+
+func TestEscapeInvalidChar(t *testing.T) {
+ input := []byte("A \x00 terminated string.")
+ expected := "A \uFFFD terminated string."
+
+ buff := new(bytes.Buffer)
+ Escape(buff, input)
+ text := buff.String()
+
+ if text != expected {
+ t.Errorf("got %v, want %v", text, expected)
+ }
+}


--

---
You received this message because you are subscribed to the Google Groups "golang-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email to golang-dev+unsubscribe@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupgolang-dev @
categoriesgo
postedMar 1, '13 at 1:56a
activeMar 1, '13 at 1:56a
posts1
users1
websitegolang.org

1 user in discussion

Osaingre: 1 post

People

Translate

site design / logo © 2022 Grokbase