source: code/trunk/engines/iciba.go@ 10

Last change on this file since 10 was 10, checked in by fattalion, 3 years ago

Make some identifiers more concise

See https://go.dev/doc/effective_go#package-names, specifically:

The importer of a package will use the name to refer to its contents,
so exported names in the package can use that fact to avoid
repetition.

For example, engines.GoogleTranslateEngine needlessly repeats
"engine," so just get rid of that duplication by renaming it to
engines.GoogleTranslate.

Renaming engines.TranslationEngine to engines.Engine may be
debatable, so if somebody disagrees, feel free to leave a comment
stating your disagreement and with an explanation of why you disagree.

File size: 9.3 KB
Line 
1package engines
2
3import (
4 "crypto/md5"
5 "encoding/hex"
6 "encoding/json"
7 "fmt"
8 "net/http"
9 "net/url"
10)
11
12// ICIBA is an engine that fetches data from https://www.iciba.com.
13type ICIBA struct{}
14
15func (_ *ICIBA) InternalName() string { return "iciba" }
16
17func (_ *ICIBA) DisplayName() string { return "iCIBA" }
18
19var icibaLanguages = []Language{
20 // ICIBA does have an API, but they return Chinese names.
21 // For languages already present in Google translate, the English
22 // names in that engine file are used; Otherwise official names
23 // as researched on Wikipedia are used. They're validated against
24 // the Chinese names to the best of my ability.
25 // Missing "cni", "kbh", "tmh"
26 // due to conflict between ISO-639 table and Chinese label
27 // one "//" means on iciba but not on google
28 {Name: "Achinese", Code: "ace"}, //
29 {Name: "Achuar-Shiwiar", Code: "acu"}, //
30 {Name: "Afrikaans", Code: "af"},
31 {Name: "Aguaruna", Code: "agr"}, //
32 {Name: "Akawaio", Code: "ake"}, //
33 {Name: "Albanian", Code: "sq"},
34 {Name: "Amharic", Code: "am"},
35 {Name: "Arabic", Code: "ar"},
36 {Name: "Armenian", Code: "hy"},
37 {Name: "Azerbaijani", Code: "az"},
38 {Name: "Barasana-Eduria", Code: "bsn"}, //
39 {Name: "Bashkir", Code: "ba"}, //
40 {Name: "Basque", Code: "eu"},
41 {Name: "Belarusian", Code: "be"},
42 {Name: "Bemba", Code: "bem"}, //
43 {Name: "Bengali", Code: "bn"},
44 {Name: "Berber", Code: "ber"}, //
45 {Name: "Bislama", Code: "bi"}, //
46 {Name: "Bosnian", Code: "bs"},
47 {Name: "Breton", Code: "br"}, //
48 {Name: "Bulgarian", Code: "bg"},
49 {Name: "Cabécar", Code: "cjp"}, //
50 {Name: "Cantonese", Code: "yue"},
51 {Name: "Catalan", Code: "ca"},
52 {Name: "Cebuano", Code: "ceb"},
53 {Name: "Chamorro", Code: "cha"}, //
54 {Name: "Cherokee", Code: "chr"}, //
55 {Name: "Chichewa", Code: "ny"},
56 {Name: "Chinese (Simplified)", Code: "zh"}, // "zh-cn" on Google
57 {Name: "Chinese (Traditional)", Code: "cht"}, // "zh-tw" on Google
58 {Name: "Chuvash", Code: "cv"},
59 {Name: "Coptic", Code: "cop"}, //
60 {Name: "Corsican", Code: "co"},
61 {Name: "Croatian", Code: "hr"},
62 {Name: "Czech", Code: "cs"},
63 {Name: "Danish", Code: "da"},
64 {Name: "Dhivehi", Code: "dv"}, //
65 {Name: "Dinka", Code: "dik"}, //
66 {Name: "Dutch", Code: "nl"},
67 {Name: "Dzongkha", Code: "dz"}, //
68 {Name: "English", Code: "en"},
69 {Name: "Esperanto", Code: "eo"},
70 {Name: "Estonian", Code: "et"},
71 {Name: "Ewe", Code: "ee"}, //
72 {Name: "Faroese", Code: "fo"}, //
73 {Name: "Fijian", Code: "fj"}, //
74 {Name: "Filipino", Code: "fil"}, // "tl" on Google
75 {Name: "Finnish", Code: "fi"},
76 {Name: "French", Code: "fr"},
77 {Name: "Frisian", Code: "fy"},
78 {Name: "Galela", Code: "gbi"}, //
79 {Name: "Galician", Code: "gl"},
80 {Name: "Ganda", Code: "lg"}, //
81 {Name: "Georgian", Code: "jy"}, // "ka" on Google
82 {Name: "German", Code: "de"},
83 {Name: "Greek", Code: "el"},
84 {Name: "Guerrero Amuzgo", Code: "amu"}, //
85 {Name: "Gujarati", Code: "gu"},
86 {Name: "Haitian Creole", Code: "ht"},
87 {Name: "Hausa", Code: "ha"},
88 {Name: "Hawaiian", Code: "haw"},
89 {Name: "Hebrew", Code: "he"}, // "iw" on Google
90 {Name: "Hindi", Code: "hi"},
91 {Name: "Hmong Daw", Code: "mww"}, //
92 {Name: "Hmong", Code: "hmn"}, // not in iciba
93 {Name: "Hungarian", Code: "hu"},
94 {Name: "Icelandic", Code: "is"},
95 {Name: "Igbo", Code: "ig"},
96 {Name: "Indonesian", Code: "id"},
97 {Name: "Irish", Code: "ga"},
98 {Name: "Italian", Code: "it"},
99 {Name: "Jacalteco", Code: "jac"}, //
100 {Name: "Japanese", Code: "ja"},
101 {Name: "Javanese", Code: "jv"}, // "jw" on Google
102 {Name: "Kabyle", Code: "kab"}, //
103 {Name: "Kannada", Code: "kn"},
104 {Name: "Kaqchikel", Code: "cak"}, //
105 {Name: "Kazakh", Code: "ka"}, // Google only has "kk"
106 {Name: "Kazakh (Cyrillic)", Code: "kk"}, // Google has it as just "Kazakh"
107 {Name: "Kekchí", Code: "kek"}, //
108 {Name: "Khmer", Code: "km"},
109 {Name: "Kinyarwanda", Code: "rw"},
110 {Name: "Kongo", Code: "kg"}, //
111 {Name: "Korean", Code: "ko"},
112 {Name: "Kurdish (Kurmanji)", Code: "ku"},
113 {Name: "Kyrgyz", Code: "ky"},
114 {Name: "Lao", Code: "lo"},
115 {Name: "Latin", Code: "la"},
116 {Name: "Latvian", Code: "lv"},
117 {Name: "Lingala", Code: "ln"}, //
118 {Name: "Lithuanian", Code: "lt"},
119 {Name: "Lukpa", Code: "dop"}, //
120 {Name: "Luxembourgish", Code: "lb"},
121 {Name: "Macedonian", Code: "mk"},
122 {Name: "Malagasy", Code: "mg"},
123 {Name: "Malay", Code: "ms"},
124 {Name: "Malayalam", Code: "ml"},
125 {Name: "Maltese", Code: "mt"},
126 {Name: "Mam", Code: "mam"}, //
127 {Name: "Manx", Code: "gv"}, //
128 {Name: "Maori", Code: "mi"},
129 {Name: "Marathi", Code: "mr"},
130 {Name: "Mari (Eastern)", Code: "mhr"}, //
131 {Name: "Mari (Western)", Code: "mrj"}, //
132 {Name: "Mongolian", Code: "mn"},
133 {Name: "Montenegrin", Code: "me"}, //
134 {Name: "Myanmar (Burmese)", Code: "my"},
135 {Name: "Nahuatl", Code: "nhg"}, //
136 {Name: "Ndyuka", Code: "djk"}, //
137 {Name: "Nepali", Code: "ne"},
138 {Name: "Norwegian", Code: "no"},
139 {Name: "Odia (Oriya)", Code: "or"},
140 {Name: "Ojibwa", Code: "ojb"},
141 {Name: "Oromo", Code: "om"}, //
142 {Name: "Ossetian", Code: "os"}, //
143 {Name: "Paite", Code: "pck"}, //
144 {Name: "Papiamento", Code: "pap"}, //
145 {Name: "Pashto", Code: "ps"},
146 {Name: "Persian", Code: "fa"},
147 {Name: "Polish", Code: "pl"},
148 {Name: "Portuguese", Code: "pt"},
149 {Name: "Potawatomi", Code: "pot"}, //
150 {Name: "Punjabi", Code: "pa"},
151 {Name: "Querétaro Otomi", Code: "otq"}, //
152 {Name: "Quiché", Code: "quc"}, //
153 {Name: "Quichua", Code: "quw"}, //
154 {Name: "Quiotepec Chinantec", Code: "chq"}, //
155 {Name: "Romani", Code: "rmn"}, //
156 {Name: "Romanian", Code: "ro"},
157 {Name: "Rundi", Code: "rn"}, //
158 {Name: "Russian", Code: "ru"},
159 {Name: "Samoan", Code: "sm"},
160 {Name: "Sango", Code: "sg"}, //
161 {Name: "Scots Gaelic", Code: "gd"},
162 {Name: "Serbian", Code: "sr"},
163 {Name: "Seselwa Creole French", Code: "crs"}, //
164 {Name: "Sesotho", Code: "st"},
165 {Name: "Shona", Code: "sn"},
166 {Name: "Shuar", Code: "jiv"}, //
167 {Name: "Sindhi", Code: "sd"},
168 {Name: "Sinhala", Code: "si"},
169 {Name: "Slovak", Code: "sk"},
170 {Name: "Slovenian", Code: "sl"},
171 {Name: "Somali", Code: "so"},
172 {Name: "Spanish", Code: "es"},
173 {Name: "Sundanese", Code: "su"},
174 {Name: "Swahili", Code: "sw"},
175 {Name: "Swedish", Code: "sv"},
176 {Name: "Syriac", Code: "syc"}, // considered "extinct" but is somehow supported
177 {Name: "Tachelhit", Code: "shi"}, //
178 {Name: "Tahitian", Code: "ty"}, //
179 {Name: "Tajik", Code: "tg"},
180 {Name: "Tamil", Code: "ta"},
181 {Name: "Tatar", Code: "tt"},
182 {Name: "Telugu", Code: "te"},
183 {Name: "Tetum", Code: "tet"}, //
184 {Name: "Thai", Code: "th"},
185 {Name: "Tigre", Code: "ti"}, //
186 {Name: "Tiwi", Code: "tw"}, //
187 {Name: "Tok Pisin", Code: "tpi"}, //
188 {Name: "Tonga", Code: "to"}, //
189 {Name: "Tsonga", Code: "ts"},
190 {Name: "Tswana", Code: "tn"}, //
191 {Name: "Turkish", Code: "tr"},
192 {Name: "Turkmen", Code: "tk"},
193 {Name: "Udmurt", Code: "udm"}, //
194 {Name: "Ukrainian", Code: "uk"},
195 {Name: "Uma", Code: "ppk"}, //
196 {Name: "Urdu", Code: "ur"},
197 {Name: "Uspanteco", Code: "usp"}, //
198 {Name: "Uyghur", Code: "uy"}, // "ug" on Google
199 {Name: "Uzbek", Code: "uz"},
200 {Name: "Venda", Code: "ve"}, //
201 {Name: "Vietnamese", Code: "vi"},
202 {Name: "Waray", Code: "war"}, //
203 {Name: "Welsh", Code: "cy"},
204 {Name: "Wolaitta", Code: "wal"}, //
205 {Name: "Wolof", Code: "wol"},
206 {Name: "Xhosa", Code: "xh"},
207 {Name: "Yiddish", Code: "yi"},
208 {Name: "Yoruba", Code: "yo"},
209 {Name: "Yucatán Maya", Code: "yua"}, //
210 {Name: "Zarma", Code: "dje"}, //
211 {Name: "Zulu", Code: "zu"},
212}
213
214func (_ *ICIBA) SourceLanguages() ([]Language, error) { return icibaLanguages, nil }
215
216func (_ *ICIBA) TargetLanguages() ([]Language, error) { return icibaLanguages, nil }
217
218func (_ *ICIBA) SupportsAutodetect() bool { return true }
219
220func (_ *ICIBA) DetectLanguage(text string) (Language, error) { return Language{}, nil }
221
222type icibaTranslateResponse struct {
223 Content struct {
224 From string `json:"from"`
225 Out string `json:"out"`
226 } `json:"content"`
227}
228
229func (_ *ICIBA) Translate(text string, from Language, to Language) (TranslationResult, error) {
230 requestURL, err := url.Parse("https://ifanyi.iciba.com/index.php")
231
232 if err != nil {
233 // The URL is constant, so it should never fail.
234 panic(err)
235 }
236
237 query := url.Values{}
238 query.Add("c", "trans")
239 query.Add("m", "fy")
240 query.Add("client", "6")
241 query.Add("auth_user", "key_web_fanyi")
242
243 sum := md5.Sum([]byte(("6key_web_fanyiifanyiweb8hc9s98e" + text)))
244
245 query.Add("sign", hex.EncodeToString(sum[:])[:16])
246
247 requestURL.RawQuery = query.Encode()
248
249 formData := url.Values{}
250 formData.Add("from", from.Code)
251 formData.Add("to", to.Code)
252 formData.Add("q", text)
253
254 response, err := http.PostForm(requestURL.String(), formData)
255
256 if err != nil {
257 return TranslationResult{}, err
258 }
259
260 defer response.Body.Close()
261
262 if response.StatusCode != 200 {
263 return TranslationResult{}, fmt.Errorf("got status code %d from iCIBA", response.StatusCode)
264 }
265
266 var responseJSON icibaTranslateResponse
267
268 if err := json.NewDecoder(response.Body).Decode(&responseJSON); err != nil {
269 return TranslationResult{}, err
270 }
271
272 var sourceLanguage Language
273
274 for _, lang := range icibaLanguages {
275 if lang.Code == responseJSON.Content.From {
276 sourceLanguage = lang
277 break
278 }
279 }
280
281 if sourceLanguage == (Language{}) {
282 return TranslationResult{SourceLanguage: from, TranslatedText: responseJSON.Content.Out},
283 fmt.Errorf("language code \"%s\" is not in iCIBA's language list", responseJSON.Content.From)
284 }
285
286 return TranslationResult{
287 SourceLanguage: sourceLanguage,
288 TranslatedText: responseJSON.Content.Out,
289 }, nil
290}
Note: See TracBrowser for help on using the repository browser.