summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/text/encoding/japanese
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/encoding/japanese')
-rw-r--r--vendor/golang.org/x/text/encoding/japanese/eucjp.go72
-rw-r--r--vendor/golang.org/x/text/encoding/japanese/iso2022jp.go59
-rw-r--r--vendor/golang.org/x/text/encoding/japanese/shiftjis.go34
3 files changed, 91 insertions, 74 deletions
diff --git a/vendor/golang.org/x/text/encoding/japanese/eucjp.go b/vendor/golang.org/x/text/encoding/japanese/eucjp.go
index 40f9b05f..79313fa5 100644
--- a/vendor/golang.org/x/text/encoding/japanese/eucjp.go
+++ b/vendor/golang.org/x/text/encoding/japanese/eucjp.go
@@ -5,7 +5,6 @@
package japanese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -23,10 +22,9 @@ var eucJP = internal.Encoding{
identifier.EUCPkdFmtJapanese,
}
-var errInvalidEUCJP = errors.New("japanese: invalid EUC-JP encoding")
-
type eucJPDecoder struct{ transform.NopResetter }
+// See https://encoding.spec.whatwg.org/#euc-jp-decoder.
func (eucJPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
loop:
@@ -37,60 +35,79 @@ loop:
case c0 == 0x8e:
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ break
}
c1 := src[nSrc+1]
- if c1 < 0xa1 || 0xdf < c1 {
- err = errInvalidEUCJP
- break loop
+ switch {
+ case c1 < 0xa1:
+ r, size = utf8.RuneError, 1
+ case c1 > 0xdf:
+ r, size = utf8.RuneError, 2
+ if c1 == 0xff {
+ size = 1
+ }
+ default:
+ r, size = rune(c1)+(0xff61-0xa1), 2
}
- r, size = rune(c1)+(0xff61-0xa1), 2
-
case c0 == 0x8f:
if nSrc+2 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ if p := nSrc + 1; p < len(src) && 0xa1 <= src[p] && src[p] < 0xfe {
+ size = 2
+ }
+ break
}
c1 := src[nSrc+1]
if c1 < 0xa1 || 0xfe < c1 {
- err = errInvalidEUCJP
- break loop
+ r, size = utf8.RuneError, 1
+ break
}
c2 := src[nSrc+2]
if c2 < 0xa1 || 0xfe < c2 {
- err = errInvalidEUCJP
- break loop
+ r, size = utf8.RuneError, 2
+ break
}
- r, size = '\ufffd', 3
+ r, size = utf8.RuneError, 3
if i := int(c1-0xa1)*94 + int(c2-0xa1); i < len(jis0212Decode) {
r = rune(jis0212Decode[i])
if r == 0 {
- r = '\ufffd'
+ r = utf8.RuneError
}
}
case 0xa1 <= c0 && c0 <= 0xfe:
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ break
}
c1 := src[nSrc+1]
if c1 < 0xa1 || 0xfe < c1 {
- err = errInvalidEUCJP
- break loop
+ r, size = utf8.RuneError, 1
+ break
}
- r, size = '\ufffd', 2
+ r, size = utf8.RuneError, 2
if i := int(c0-0xa1)*94 + int(c1-0xa1); i < len(jis0208Decode) {
r = rune(jis0208Decode[i])
if r == 0 {
- r = '\ufffd'
+ r = utf8.RuneError
}
}
default:
- err = errInvalidEUCJP
- break loop
+ r, size = utf8.RuneError, 1
}
if nDst+utf8.RuneLen(r) > len(dst) {
@@ -99,9 +116,6 @@ loop:
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidEUCJP
- }
return nDst, nSrc, err
}
diff --git a/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go b/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
index b63e7d5d..613226df 100644
--- a/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
+++ b/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
@@ -5,7 +5,6 @@
package japanese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -31,8 +30,6 @@ func iso2022JPNewEncoder() transform.Transformer {
return new(iso2022JPEncoder)
}
-var errInvalidISO2022JP = errors.New("japanese: invalid ISO-2022-JP encoding")
-
const (
asciiState = iota
katakanaState
@@ -50,45 +47,51 @@ func (d *iso2022JPDecoder) Reset() {
func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
-loop:
for ; nSrc < len(src); nSrc += size {
c0 := src[nSrc]
if c0 >= utf8.RuneSelf {
- err = errInvalidISO2022JP
- break loop
+ r, size = '\ufffd', 1
+ goto write
}
if c0 == asciiEsc {
if nSrc+2 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ return nDst, nSrc, transform.ErrShortSrc
+ }
+ // TODO: is it correct to only skip 1??
+ r, size = '\ufffd', 1
+ goto write
}
size = 3
c1 := src[nSrc+1]
c2 := src[nSrc+2]
switch {
- case c1 == '$' && (c2 == '@' || c2 == 'B'):
+ case c1 == '$' && (c2 == '@' || c2 == 'B'): // 0x24 {0x40, 0x42}
*d = jis0208State
continue
- case c1 == '$' && c2 == '(':
+ case c1 == '$' && c2 == '(': // 0x24 0x28
if nSrc+3 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ return nDst, nSrc, transform.ErrShortSrc
+ }
+ r, size = '\ufffd', 1
+ goto write
}
size = 4
- if src[nSrc]+3 == 'D' {
+ if src[nSrc+3] == 'D' {
*d = jis0212State
continue
}
- case c1 == '(' && (c2 == 'B' || c2 == 'J'):
+ case c1 == '(' && (c2 == 'B' || c2 == 'J'): // 0x28 {0x42, 0x4A}
*d = asciiState
continue
- case c1 == '(' && c2 == 'I':
+ case c1 == '(' && c2 == 'I': // 0x28 0x49
*d = katakanaState
continue
}
- err = errInvalidISO2022JP
- break loop
+ r, size = '\ufffd', 1
+ goto write
}
switch *d {
@@ -97,8 +100,8 @@ loop:
case katakanaState:
if c0 < 0x21 || 0x60 <= c0 {
- err = errInvalidISO2022JP
- break loop
+ r, size = '\ufffd', 1
+ goto write
}
r, size = rune(c0)+(0xff61-0x21), 1
@@ -106,11 +109,14 @@ loop:
if c0 == 0x0a {
*d = asciiState
r, size = rune(c0), 1
- break
+ goto write
}
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ return nDst, nSrc, transform.ErrShortSrc
+ }
+ r, size = '\ufffd', 1
+ goto write
}
size = 2
c1 := src[nSrc+1]
@@ -121,22 +127,19 @@ loop:
r = rune(jis0212Decode[i])
} else {
r = '\ufffd'
- break
+ goto write
}
if r == 0 {
r = '\ufffd'
}
}
+ write:
if nDst+utf8.RuneLen(r) > len(dst) {
- err = transform.ErrShortDst
- break loop
+ return nDst, nSrc, transform.ErrShortDst
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidISO2022JP
- }
return nDst, nSrc, err
}
diff --git a/vendor/golang.org/x/text/encoding/japanese/shiftjis.go b/vendor/golang.org/x/text/encoding/japanese/shiftjis.go
index 099aecc3..16fd8a6e 100644
--- a/vendor/golang.org/x/text/encoding/japanese/shiftjis.go
+++ b/vendor/golang.org/x/text/encoding/japanese/shiftjis.go
@@ -5,7 +5,6 @@
package japanese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -24,8 +23,6 @@ var shiftJIS = internal.Encoding{
identifier.ShiftJIS,
}
-var errInvalidShiftJIS = errors.New("japanese: invalid Shift JIS encoding")
-
type shiftJISDecoder struct{ transform.NopResetter }
func (shiftJISDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -48,28 +45,32 @@ loop:
c0 = 2*c0 - 0x21
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = '\ufffd', 1
+ goto write
}
c1 := src[nSrc+1]
switch {
case c1 < 0x40:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 1 // c1 is ASCII so output on next round
+ goto write
case c1 < 0x7f:
c0--
c1 -= 0x40
case c1 == 0x7f:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 1 // c1 is ASCII so output on next round
+ goto write
case c1 < 0x9f:
c0--
c1 -= 0x41
case c1 < 0xfd:
c1 -= 0x9f
default:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 2
+ goto write
}
r, size = '\ufffd', 2
if i := int(c0)*94 + int(c1); i < len(jis0208Decode) {
@@ -79,20 +80,19 @@ loop:
}
}
+ case c0 == 0x80:
+ r, size = 0x80, 1
+
default:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 1
}
-
+ write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidShiftJIS
- }
return nDst, nSrc, err
}