source: code/trunk/vendor/golang.org/x/text/language/parse.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 7.5 KB
Line 
1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package language
6
7import (
8 "errors"
9 "sort"
10 "strconv"
11 "strings"
12
13 "golang.org/x/text/internal/language"
14)
15
16// ValueError is returned by any of the parsing functions when the
17// input is well-formed but the respective subtag is not recognized
18// as a valid value.
19type ValueError interface {
20 error
21
22 // Subtag returns the subtag for which the error occurred.
23 Subtag() string
24}
25
26// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
27// failed it returns an error and any part of the tag that could be parsed.
28// If parsing succeeded but an unknown value was found, it returns
29// ValueError. The Tag returned in this case is just stripped of the unknown
30// value. All other values are preserved. It accepts tags in the BCP 47 format
31// and extensions to this standard defined in
32// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
33// The resulting tag is canonicalized using the default canonicalization type.
34func Parse(s string) (t Tag, err error) {
35 return Default.Parse(s)
36}
37
38// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
39// failed it returns an error and any part of the tag that could be parsed.
40// If parsing succeeded but an unknown value was found, it returns
41// ValueError. The Tag returned in this case is just stripped of the unknown
42// value. All other values are preserved. It accepts tags in the BCP 47 format
43// and extensions to this standard defined in
44// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
45// The resulting tag is canonicalized using the canonicalization type c.
46func (c CanonType) Parse(s string) (t Tag, err error) {
47 defer func() {
48 if recover() != nil {
49 t = Tag{}
50 err = language.ErrSyntax
51 }
52 }()
53
54 tt, err := language.Parse(s)
55 if err != nil {
56 return makeTag(tt), err
57 }
58 tt, changed := canonicalize(c, tt)
59 if changed {
60 tt.RemakeString()
61 }
62 return makeTag(tt), err
63}
64
65// Compose creates a Tag from individual parts, which may be of type Tag, Base,
66// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
67// Base, Script or Region or slice of type Variant or Extension is passed more
68// than once, the latter will overwrite the former. Variants and Extensions are
69// accumulated, but if two extensions of the same type are passed, the latter
70// will replace the former. For -u extensions, though, the key-type pairs are
71// added, where later values overwrite older ones. A Tag overwrites all former
72// values and typically only makes sense as the first argument. The resulting
73// tag is returned after canonicalizing using the Default CanonType. If one or
74// more errors are encountered, one of the errors is returned.
75func Compose(part ...interface{}) (t Tag, err error) {
76 return Default.Compose(part...)
77}
78
79// Compose creates a Tag from individual parts, which may be of type Tag, Base,
80// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
81// Base, Script or Region or slice of type Variant or Extension is passed more
82// than once, the latter will overwrite the former. Variants and Extensions are
83// accumulated, but if two extensions of the same type are passed, the latter
84// will replace the former. For -u extensions, though, the key-type pairs are
85// added, where later values overwrite older ones. A Tag overwrites all former
86// values and typically only makes sense as the first argument. The resulting
87// tag is returned after canonicalizing using CanonType c. If one or more errors
88// are encountered, one of the errors is returned.
89func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
90 defer func() {
91 if recover() != nil {
92 t = Tag{}
93 err = language.ErrSyntax
94 }
95 }()
96
97 var b language.Builder
98 if err = update(&b, part...); err != nil {
99 return und, err
100 }
101 b.Tag, _ = canonicalize(c, b.Tag)
102 return makeTag(b.Make()), err
103}
104
105var errInvalidArgument = errors.New("invalid Extension or Variant")
106
107func update(b *language.Builder, part ...interface{}) (err error) {
108 for _, x := range part {
109 switch v := x.(type) {
110 case Tag:
111 b.SetTag(v.tag())
112 case Base:
113 b.Tag.LangID = v.langID
114 case Script:
115 b.Tag.ScriptID = v.scriptID
116 case Region:
117 b.Tag.RegionID = v.regionID
118 case Variant:
119 if v.variant == "" {
120 err = errInvalidArgument
121 break
122 }
123 b.AddVariant(v.variant)
124 case Extension:
125 if v.s == "" {
126 err = errInvalidArgument
127 break
128 }
129 b.SetExt(v.s)
130 case []Variant:
131 b.ClearVariants()
132 for _, v := range v {
133 b.AddVariant(v.variant)
134 }
135 case []Extension:
136 b.ClearExtensions()
137 for _, e := range v {
138 b.SetExt(e.s)
139 }
140 // TODO: support parsing of raw strings based on morphology or just extensions?
141 case error:
142 if v != nil {
143 err = v
144 }
145 }
146 }
147 return
148}
149
150var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
151var errTagListTooLarge = errors.New("tag list exceeds max length")
152
153// ParseAcceptLanguage parses the contents of an Accept-Language header as
154// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
155// a list of corresponding quality weights. It is more permissive than RFC 2616
156// and may return non-nil slices even if the input is not valid.
157// The Tags will be sorted by highest weight first and then by first occurrence.
158// Tags with a weight of zero will be dropped. An error will be returned if the
159// input could not be parsed.
160func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
161 defer func() {
162 if recover() != nil {
163 tag = nil
164 q = nil
165 err = language.ErrSyntax
166 }
167 }()
168
169 if strings.Count(s, "-") > 1000 {
170 return nil, nil, errTagListTooLarge
171 }
172
173 var entry string
174 for s != "" {
175 if entry, s = split(s, ','); entry == "" {
176 continue
177 }
178
179 entry, weight := split(entry, ';')
180
181 // Scan the language.
182 t, err := Parse(entry)
183 if err != nil {
184 id, ok := acceptFallback[entry]
185 if !ok {
186 return nil, nil, err
187 }
188 t = makeTag(language.Tag{LangID: id})
189 }
190
191 // Scan the optional weight.
192 w := 1.0
193 if weight != "" {
194 weight = consume(weight, 'q')
195 weight = consume(weight, '=')
196 // consume returns the empty string when a token could not be
197 // consumed, resulting in an error for ParseFloat.
198 if w, err = strconv.ParseFloat(weight, 32); err != nil {
199 return nil, nil, errInvalidWeight
200 }
201 // Drop tags with a quality weight of 0.
202 if w <= 0 {
203 continue
204 }
205 }
206
207 tag = append(tag, t)
208 q = append(q, float32(w))
209 }
210 sort.Stable(&tagSort{tag, q})
211 return tag, q, nil
212}
213
214// consume removes a leading token c from s and returns the result or the empty
215// string if there is no such token.
216func consume(s string, c byte) string {
217 if s == "" || s[0] != c {
218 return ""
219 }
220 return strings.TrimSpace(s[1:])
221}
222
223func split(s string, c byte) (head, tail string) {
224 if i := strings.IndexByte(s, c); i >= 0 {
225 return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
226 }
227 return strings.TrimSpace(s), ""
228}
229
230// Add hack mapping to deal with a small number of cases that occur
231// in Accept-Language (with reasonable frequency).
232var acceptFallback = map[string]language.Language{
233 "english": _en,
234 "deutsch": _de,
235 "italian": _it,
236 "french": _fr,
237 "*": _mul, // defined in the spec to match all languages.
238}
239
240type tagSort struct {
241 tag []Tag
242 q []float32
243}
244
245func (s *tagSort) Len() int {
246 return len(s.q)
247}
248
249func (s *tagSort) Less(i, j int) bool {
250 return s.q[i] > s.q[j]
251}
252
253func (s *tagSort) Swap(i, j int) {
254 s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
255 s.q[i], s.q[j] = s.q[j], s.q[i]
256}
Note: See TracBrowser for help on using the repository browser.