1 | // Copyright 2019 The Go Authors. All rights reserved.
|
---|
2 | // Use of this source code is governed by a BSD-style
|
---|
3 | // license that can be found in the LICENSE file.
|
---|
4 |
|
---|
5 | // Package strs provides string manipulation functionality specific to protobuf.
|
---|
6 | package strs
|
---|
7 |
|
---|
8 | import (
|
---|
9 | "go/token"
|
---|
10 | "strings"
|
---|
11 | "unicode"
|
---|
12 | "unicode/utf8"
|
---|
13 |
|
---|
14 | "google.golang.org/protobuf/internal/flags"
|
---|
15 | "google.golang.org/protobuf/reflect/protoreflect"
|
---|
16 | )
|
---|
17 |
|
---|
18 | // EnforceUTF8 reports whether to enforce strict UTF-8 validation.
|
---|
19 | func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
|
---|
20 | if flags.ProtoLegacy {
|
---|
21 | if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
|
---|
22 | return fd.EnforceUTF8()
|
---|
23 | }
|
---|
24 | }
|
---|
25 | return fd.Syntax() == protoreflect.Proto3
|
---|
26 | }
|
---|
27 |
|
---|
28 | // GoCamelCase camel-cases a protobuf name for use as a Go identifier.
|
---|
29 | //
|
---|
30 | // If there is an interior underscore followed by a lower case letter,
|
---|
31 | // drop the underscore and convert the letter to upper case.
|
---|
32 | func GoCamelCase(s string) string {
|
---|
33 | // Invariant: if the next letter is lower case, it must be converted
|
---|
34 | // to upper case.
|
---|
35 | // That is, we process a word at a time, where words are marked by _ or
|
---|
36 | // upper case letter. Digits are treated as words.
|
---|
37 | var b []byte
|
---|
38 | for i := 0; i < len(s); i++ {
|
---|
39 | c := s[i]
|
---|
40 | switch {
|
---|
41 | case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
|
---|
42 | // Skip over '.' in ".{{lowercase}}".
|
---|
43 | case c == '.':
|
---|
44 | b = append(b, '_') // convert '.' to '_'
|
---|
45 | case c == '_' && (i == 0 || s[i-1] == '.'):
|
---|
46 | // Convert initial '_' to ensure we start with a capital letter.
|
---|
47 | // Do the same for '_' after '.' to match historic behavior.
|
---|
48 | b = append(b, 'X') // convert '_' to 'X'
|
---|
49 | case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
|
---|
50 | // Skip over '_' in "_{{lowercase}}".
|
---|
51 | case isASCIIDigit(c):
|
---|
52 | b = append(b, c)
|
---|
53 | default:
|
---|
54 | // Assume we have a letter now - if not, it's a bogus identifier.
|
---|
55 | // The next word is a sequence of characters that must start upper case.
|
---|
56 | if isASCIILower(c) {
|
---|
57 | c -= 'a' - 'A' // convert lowercase to uppercase
|
---|
58 | }
|
---|
59 | b = append(b, c)
|
---|
60 |
|
---|
61 | // Accept lower case sequence that follows.
|
---|
62 | for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
|
---|
63 | b = append(b, s[i+1])
|
---|
64 | }
|
---|
65 | }
|
---|
66 | }
|
---|
67 | return string(b)
|
---|
68 | }
|
---|
69 |
|
---|
70 | // GoSanitized converts a string to a valid Go identifier.
|
---|
71 | func GoSanitized(s string) string {
|
---|
72 | // Sanitize the input to the set of valid characters,
|
---|
73 | // which must be '_' or be in the Unicode L or N categories.
|
---|
74 | s = strings.Map(func(r rune) rune {
|
---|
75 | if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
---|
76 | return r
|
---|
77 | }
|
---|
78 | return '_'
|
---|
79 | }, s)
|
---|
80 |
|
---|
81 | // Prepend '_' in the event of a Go keyword conflict or if
|
---|
82 | // the identifier is invalid (does not start in the Unicode L category).
|
---|
83 | r, _ := utf8.DecodeRuneInString(s)
|
---|
84 | if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
|
---|
85 | return "_" + s
|
---|
86 | }
|
---|
87 | return s
|
---|
88 | }
|
---|
89 |
|
---|
90 | // JSONCamelCase converts a snake_case identifier to a camelCase identifier,
|
---|
91 | // according to the protobuf JSON specification.
|
---|
92 | func JSONCamelCase(s string) string {
|
---|
93 | var b []byte
|
---|
94 | var wasUnderscore bool
|
---|
95 | for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
|
---|
96 | c := s[i]
|
---|
97 | if c != '_' {
|
---|
98 | if wasUnderscore && isASCIILower(c) {
|
---|
99 | c -= 'a' - 'A' // convert to uppercase
|
---|
100 | }
|
---|
101 | b = append(b, c)
|
---|
102 | }
|
---|
103 | wasUnderscore = c == '_'
|
---|
104 | }
|
---|
105 | return string(b)
|
---|
106 | }
|
---|
107 |
|
---|
108 | // JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
|
---|
109 | // according to the protobuf JSON specification.
|
---|
110 | func JSONSnakeCase(s string) string {
|
---|
111 | var b []byte
|
---|
112 | for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
|
---|
113 | c := s[i]
|
---|
114 | if isASCIIUpper(c) {
|
---|
115 | b = append(b, '_')
|
---|
116 | c += 'a' - 'A' // convert to lowercase
|
---|
117 | }
|
---|
118 | b = append(b, c)
|
---|
119 | }
|
---|
120 | return string(b)
|
---|
121 | }
|
---|
122 |
|
---|
123 | // MapEntryName derives the name of the map entry message given the field name.
|
---|
124 | // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
|
---|
125 | func MapEntryName(s string) string {
|
---|
126 | var b []byte
|
---|
127 | upperNext := true
|
---|
128 | for _, c := range s {
|
---|
129 | switch {
|
---|
130 | case c == '_':
|
---|
131 | upperNext = true
|
---|
132 | case upperNext:
|
---|
133 | b = append(b, byte(unicode.ToUpper(c)))
|
---|
134 | upperNext = false
|
---|
135 | default:
|
---|
136 | b = append(b, byte(c))
|
---|
137 | }
|
---|
138 | }
|
---|
139 | b = append(b, "Entry"...)
|
---|
140 | return string(b)
|
---|
141 | }
|
---|
142 |
|
---|
143 | // EnumValueName derives the camel-cased enum value name.
|
---|
144 | // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
|
---|
145 | func EnumValueName(s string) string {
|
---|
146 | var b []byte
|
---|
147 | upperNext := true
|
---|
148 | for _, c := range s {
|
---|
149 | switch {
|
---|
150 | case c == '_':
|
---|
151 | upperNext = true
|
---|
152 | case upperNext:
|
---|
153 | b = append(b, byte(unicode.ToUpper(c)))
|
---|
154 | upperNext = false
|
---|
155 | default:
|
---|
156 | b = append(b, byte(unicode.ToLower(c)))
|
---|
157 | upperNext = false
|
---|
158 | }
|
---|
159 | }
|
---|
160 | return string(b)
|
---|
161 | }
|
---|
162 |
|
---|
163 | // TrimEnumPrefix trims the enum name prefix from an enum value name,
|
---|
164 | // where the prefix is all lowercase without underscores.
|
---|
165 | // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
|
---|
166 | func TrimEnumPrefix(s, prefix string) string {
|
---|
167 | s0 := s // original input
|
---|
168 | for len(s) > 0 && len(prefix) > 0 {
|
---|
169 | if s[0] == '_' {
|
---|
170 | s = s[1:]
|
---|
171 | continue
|
---|
172 | }
|
---|
173 | if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
|
---|
174 | return s0 // no prefix match
|
---|
175 | }
|
---|
176 | s, prefix = s[1:], prefix[1:]
|
---|
177 | }
|
---|
178 | if len(prefix) > 0 {
|
---|
179 | return s0 // no prefix match
|
---|
180 | }
|
---|
181 | s = strings.TrimLeft(s, "_")
|
---|
182 | if len(s) == 0 {
|
---|
183 | return s0 // avoid returning empty string
|
---|
184 | }
|
---|
185 | return s
|
---|
186 | }
|
---|
187 |
|
---|
188 | func isASCIILower(c byte) bool {
|
---|
189 | return 'a' <= c && c <= 'z'
|
---|
190 | }
|
---|
191 | func isASCIIUpper(c byte) bool {
|
---|
192 | return 'A' <= c && c <= 'Z'
|
---|
193 | }
|
---|
194 | func isASCIIDigit(c byte) bool {
|
---|
195 | return '0' <= c && c <= '9'
|
---|
196 | }
|
---|