1 | // Copyright 2013 The Go Authors. All rights reserved.
|
---|
2 | // Use of this source code is governed by a BSD-style
|
---|
3 | // license that can be found in the LICENSE file.
|
---|
4 |
|
---|
5 | package language
|
---|
6 |
|
---|
7 | import (
|
---|
8 | "errors"
|
---|
9 | "sort"
|
---|
10 | "strconv"
|
---|
11 | "strings"
|
---|
12 |
|
---|
13 | "golang.org/x/text/internal/language"
|
---|
14 | )
|
---|
15 |
|
---|
16 | // ValueError is returned by any of the parsing functions when the
|
---|
17 | // input is well-formed but the respective subtag is not recognized
|
---|
18 | // as a valid value.
|
---|
19 | type ValueError interface {
|
---|
20 | error
|
---|
21 |
|
---|
22 | // Subtag returns the subtag for which the error occurred.
|
---|
23 | Subtag() string
|
---|
24 | }
|
---|
25 |
|
---|
26 | // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
---|
27 | // failed it returns an error and any part of the tag that could be parsed.
|
---|
28 | // If parsing succeeded but an unknown value was found, it returns
|
---|
29 | // ValueError. The Tag returned in this case is just stripped of the unknown
|
---|
30 | // value. All other values are preserved. It accepts tags in the BCP 47 format
|
---|
31 | // and extensions to this standard defined in
|
---|
32 | // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
---|
33 | // The resulting tag is canonicalized using the default canonicalization type.
|
---|
34 | func Parse(s string) (t Tag, err error) {
|
---|
35 | return Default.Parse(s)
|
---|
36 | }
|
---|
37 |
|
---|
38 | // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
---|
39 | // failed it returns an error and any part of the tag that could be parsed.
|
---|
40 | // If parsing succeeded but an unknown value was found, it returns
|
---|
41 | // ValueError. The Tag returned in this case is just stripped of the unknown
|
---|
42 | // value. All other values are preserved. It accepts tags in the BCP 47 format
|
---|
43 | // and extensions to this standard defined in
|
---|
44 | // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
---|
45 | // The resulting tag is canonicalized using the canonicalization type c.
|
---|
46 | func (c CanonType) Parse(s string) (t Tag, err error) {
|
---|
47 | defer func() {
|
---|
48 | if recover() != nil {
|
---|
49 | t = Tag{}
|
---|
50 | err = language.ErrSyntax
|
---|
51 | }
|
---|
52 | }()
|
---|
53 |
|
---|
54 | tt, err := language.Parse(s)
|
---|
55 | if err != nil {
|
---|
56 | return makeTag(tt), err
|
---|
57 | }
|
---|
58 | tt, changed := canonicalize(c, tt)
|
---|
59 | if changed {
|
---|
60 | tt.RemakeString()
|
---|
61 | }
|
---|
62 | return makeTag(tt), err
|
---|
63 | }
|
---|
64 |
|
---|
65 | // Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
---|
66 | // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
---|
67 | // Base, Script or Region or slice of type Variant or Extension is passed more
|
---|
68 | // than once, the latter will overwrite the former. Variants and Extensions are
|
---|
69 | // accumulated, but if two extensions of the same type are passed, the latter
|
---|
70 | // will replace the former. For -u extensions, though, the key-type pairs are
|
---|
71 | // added, where later values overwrite older ones. A Tag overwrites all former
|
---|
72 | // values and typically only makes sense as the first argument. The resulting
|
---|
73 | // tag is returned after canonicalizing using the Default CanonType. If one or
|
---|
74 | // more errors are encountered, one of the errors is returned.
|
---|
75 | func Compose(part ...interface{}) (t Tag, err error) {
|
---|
76 | return Default.Compose(part...)
|
---|
77 | }
|
---|
78 |
|
---|
79 | // Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
---|
80 | // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
---|
81 | // Base, Script or Region or slice of type Variant or Extension is passed more
|
---|
82 | // than once, the latter will overwrite the former. Variants and Extensions are
|
---|
83 | // accumulated, but if two extensions of the same type are passed, the latter
|
---|
84 | // will replace the former. For -u extensions, though, the key-type pairs are
|
---|
85 | // added, where later values overwrite older ones. A Tag overwrites all former
|
---|
86 | // values and typically only makes sense as the first argument. The resulting
|
---|
87 | // tag is returned after canonicalizing using CanonType c. If one or more errors
|
---|
88 | // are encountered, one of the errors is returned.
|
---|
89 | func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
|
---|
90 | defer func() {
|
---|
91 | if recover() != nil {
|
---|
92 | t = Tag{}
|
---|
93 | err = language.ErrSyntax
|
---|
94 | }
|
---|
95 | }()
|
---|
96 |
|
---|
97 | var b language.Builder
|
---|
98 | if err = update(&b, part...); err != nil {
|
---|
99 | return und, err
|
---|
100 | }
|
---|
101 | b.Tag, _ = canonicalize(c, b.Tag)
|
---|
102 | return makeTag(b.Make()), err
|
---|
103 | }
|
---|
104 |
|
---|
105 | var errInvalidArgument = errors.New("invalid Extension or Variant")
|
---|
106 |
|
---|
107 | func update(b *language.Builder, part ...interface{}) (err error) {
|
---|
108 | for _, x := range part {
|
---|
109 | switch v := x.(type) {
|
---|
110 | case Tag:
|
---|
111 | b.SetTag(v.tag())
|
---|
112 | case Base:
|
---|
113 | b.Tag.LangID = v.langID
|
---|
114 | case Script:
|
---|
115 | b.Tag.ScriptID = v.scriptID
|
---|
116 | case Region:
|
---|
117 | b.Tag.RegionID = v.regionID
|
---|
118 | case Variant:
|
---|
119 | if v.variant == "" {
|
---|
120 | err = errInvalidArgument
|
---|
121 | break
|
---|
122 | }
|
---|
123 | b.AddVariant(v.variant)
|
---|
124 | case Extension:
|
---|
125 | if v.s == "" {
|
---|
126 | err = errInvalidArgument
|
---|
127 | break
|
---|
128 | }
|
---|
129 | b.SetExt(v.s)
|
---|
130 | case []Variant:
|
---|
131 | b.ClearVariants()
|
---|
132 | for _, v := range v {
|
---|
133 | b.AddVariant(v.variant)
|
---|
134 | }
|
---|
135 | case []Extension:
|
---|
136 | b.ClearExtensions()
|
---|
137 | for _, e := range v {
|
---|
138 | b.SetExt(e.s)
|
---|
139 | }
|
---|
140 | // TODO: support parsing of raw strings based on morphology or just extensions?
|
---|
141 | case error:
|
---|
142 | if v != nil {
|
---|
143 | err = v
|
---|
144 | }
|
---|
145 | }
|
---|
146 | }
|
---|
147 | return
|
---|
148 | }
|
---|
149 |
|
---|
150 | var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
|
---|
151 | var errTagListTooLarge = errors.New("tag list exceeds max length")
|
---|
152 |
|
---|
153 | // ParseAcceptLanguage parses the contents of an Accept-Language header as
|
---|
154 | // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
|
---|
155 | // a list of corresponding quality weights. It is more permissive than RFC 2616
|
---|
156 | // and may return non-nil slices even if the input is not valid.
|
---|
157 | // The Tags will be sorted by highest weight first and then by first occurrence.
|
---|
158 | // Tags with a weight of zero will be dropped. An error will be returned if the
|
---|
159 | // input could not be parsed.
|
---|
160 | func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
|
---|
161 | defer func() {
|
---|
162 | if recover() != nil {
|
---|
163 | tag = nil
|
---|
164 | q = nil
|
---|
165 | err = language.ErrSyntax
|
---|
166 | }
|
---|
167 | }()
|
---|
168 |
|
---|
169 | if strings.Count(s, "-") > 1000 {
|
---|
170 | return nil, nil, errTagListTooLarge
|
---|
171 | }
|
---|
172 |
|
---|
173 | var entry string
|
---|
174 | for s != "" {
|
---|
175 | if entry, s = split(s, ','); entry == "" {
|
---|
176 | continue
|
---|
177 | }
|
---|
178 |
|
---|
179 | entry, weight := split(entry, ';')
|
---|
180 |
|
---|
181 | // Scan the language.
|
---|
182 | t, err := Parse(entry)
|
---|
183 | if err != nil {
|
---|
184 | id, ok := acceptFallback[entry]
|
---|
185 | if !ok {
|
---|
186 | return nil, nil, err
|
---|
187 | }
|
---|
188 | t = makeTag(language.Tag{LangID: id})
|
---|
189 | }
|
---|
190 |
|
---|
191 | // Scan the optional weight.
|
---|
192 | w := 1.0
|
---|
193 | if weight != "" {
|
---|
194 | weight = consume(weight, 'q')
|
---|
195 | weight = consume(weight, '=')
|
---|
196 | // consume returns the empty string when a token could not be
|
---|
197 | // consumed, resulting in an error for ParseFloat.
|
---|
198 | if w, err = strconv.ParseFloat(weight, 32); err != nil {
|
---|
199 | return nil, nil, errInvalidWeight
|
---|
200 | }
|
---|
201 | // Drop tags with a quality weight of 0.
|
---|
202 | if w <= 0 {
|
---|
203 | continue
|
---|
204 | }
|
---|
205 | }
|
---|
206 |
|
---|
207 | tag = append(tag, t)
|
---|
208 | q = append(q, float32(w))
|
---|
209 | }
|
---|
210 | sort.Stable(&tagSort{tag, q})
|
---|
211 | return tag, q, nil
|
---|
212 | }
|
---|
213 |
|
---|
214 | // consume removes a leading token c from s and returns the result or the empty
|
---|
215 | // string if there is no such token.
|
---|
216 | func consume(s string, c byte) string {
|
---|
217 | if s == "" || s[0] != c {
|
---|
218 | return ""
|
---|
219 | }
|
---|
220 | return strings.TrimSpace(s[1:])
|
---|
221 | }
|
---|
222 |
|
---|
223 | func split(s string, c byte) (head, tail string) {
|
---|
224 | if i := strings.IndexByte(s, c); i >= 0 {
|
---|
225 | return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
|
---|
226 | }
|
---|
227 | return strings.TrimSpace(s), ""
|
---|
228 | }
|
---|
229 |
|
---|
230 | // Add hack mapping to deal with a small number of cases that occur
|
---|
231 | // in Accept-Language (with reasonable frequency).
|
---|
232 | var acceptFallback = map[string]language.Language{
|
---|
233 | "english": _en,
|
---|
234 | "deutsch": _de,
|
---|
235 | "italian": _it,
|
---|
236 | "french": _fr,
|
---|
237 | "*": _mul, // defined in the spec to match all languages.
|
---|
238 | }
|
---|
239 |
|
---|
240 | type tagSort struct {
|
---|
241 | tag []Tag
|
---|
242 | q []float32
|
---|
243 | }
|
---|
244 |
|
---|
245 | func (s *tagSort) Len() int {
|
---|
246 | return len(s.q)
|
---|
247 | }
|
---|
248 |
|
---|
249 | func (s *tagSort) Less(i, j int) bool {
|
---|
250 | return s.q[i] > s.q[j]
|
---|
251 | }
|
---|
252 |
|
---|
253 | func (s *tagSort) Swap(i, j int) {
|
---|
254 | s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
|
---|
255 | s.q[i], s.q[j] = s.q[j], s.q[i]
|
---|
256 | }
|
---|