source: code/trunk/iciba.go@ 6

Last change on this file since 6 was 6, checked in by fattalion, 3 years ago

Add iCIBA engine

File size: 9.4 KB
Line 
1package simplytranslate_engines
2
3import (
4 "crypto/md5"
5 "encoding/hex"
6 "encoding/json"
7 "fmt"
8 "net/http"
9 "net/url"
10)
11
12// ICIBAEngine is an engine that fetches data from https://www.iciba.com.
13type ICIBAEngine struct{}
14
15func (_ *ICIBAEngine) InternalName() string { return "iciba" }
16
17func (_ *ICIBAEngine) DisplayName() string { return "iCIBA" }
18
19var icibaLanguages = []Language{
20 // ICIBA does have an API, but they return Chinese names.
21 // For languages already present in Google translate, the English
22 // names in that engine file are used; Otherwise official names
23 // as researched on Wikipedia are used. They're validated against
24 // the Chinese names to the best of my ability.
25 // Missing "cni", "kbh", "tmh"
26 // due to conflict between ISO-639 table and Chinese label
27 // one "//" means on iciba but not on google
28 {Name: "Achinese", Code: "ace"}, //
29 {Name: "Achuar-Shiwiar", Code: "acu"}, //
30 {Name: "Afrikaans", Code: "af"},
31 {Name: "Aguaruna", Code: "agr"}, //
32 {Name: "Akawaio", Code: "ake"}, //
33 {Name: "Albanian", Code: "sq"},
34 {Name: "Amharic", Code: "am"},
35 {Name: "Arabic", Code: "ar"},
36 {Name: "Armenian", Code: "hy"},
37 {Name: "Azerbaijani", Code: "az"},
38 {Name: "Barasana-Eduria", Code: "bsn"}, //
39 {Name: "Bashkir", Code: "ba"}, //
40 {Name: "Basque", Code: "eu"},
41 {Name: "Belarusian", Code: "be"},
42 {Name: "Bemba", Code: "bem"}, //
43 {Name: "Bengali", Code: "bn"},
44 {Name: "Berber", Code: "ber"}, //
45 {Name: "Bislama", Code: "bi"}, //
46 {Name: "Bosnian", Code: "bs"},
47 {Name: "Breton", Code: "br"}, //
48 {Name: "Bulgarian", Code: "bg"},
49 {Name: "Cabécar", Code: "cjp"}, //
50 {Name: "Cantonese", Code: "yue"},
51 {Name: "Catalan", Code: "ca"},
52 {Name: "Cebuano", Code: "ceb"},
53 {Name: "Chamorro", Code: "cha"}, //
54 {Name: "Cherokee", Code: "chr"}, //
55 {Name: "Chichewa", Code: "ny"},
56 {Name: "Chinese (Simplified)", Code: "zh"}, // "zh-cn" on Google
57 {Name: "Chinese (Traditional)", Code: "cht"}, // "zh-tw" on Google
58 {Name: "Chuvash", Code: "cv"},
59 {Name: "Coptic", Code: "cop"}, //
60 {Name: "Corsican", Code: "co"},
61 {Name: "Croatian", Code: "hr"},
62 {Name: "Czech", Code: "cs"},
63 {Name: "Danish", Code: "da"},
64 {Name: "Dhivehi", Code: "dv"}, //
65 {Name: "Dinka", Code: "dik"}, //
66 {Name: "Dutch", Code: "nl"},
67 {Name: "Dzongkha", Code: "dz"}, //
68 {Name: "English", Code: "en"},
69 {Name: "Esperanto", Code: "eo"},
70 {Name: "Estonian", Code: "et"},
71 {Name: "Ewe", Code: "ee"}, //
72 {Name: "Faroese", Code: "fo"}, //
73 {Name: "Fijian", Code: "fj"}, //
74 {Name: "Filipino", Code: "fil"}, // "tl" on Google
75 {Name: "Finnish", Code: "fi"},
76 {Name: "French", Code: "fr"},
77 {Name: "Frisian", Code: "fy"},
78 {Name: "Galela", Code: "gbi"}, //
79 {Name: "Galician", Code: "gl"},
80 {Name: "Ganda", Code: "lg"}, //
81 {Name: "Georgian", Code: "jy"}, // "ka" on Google
82 {Name: "German", Code: "de"},
83 {Name: "Greek", Code: "el"},
84 {Name: "Guerrero Amuzgo", Code: "amu"}, //
85 {Name: "Gujarati", Code: "gu"},
86 {Name: "Haitian Creole", Code: "ht"},
87 {Name: "Hausa", Code: "ha"},
88 {Name: "Hawaiian", Code: "haw"},
89 {Name: "Hebrew", Code: "he"}, // "iw" on Google
90 {Name: "Hindi", Code: "hi"},
91 {Name: "Hmong Daw", Code: "mww"}, //
92 {Name: "Hmong", Code: "hmn"}, // not in iciba
93 {Name: "Hungarian", Code: "hu"},
94 {Name: "Icelandic", Code: "is"},
95 {Name: "Igbo", Code: "ig"},
96 {Name: "Indonesian", Code: "id"},
97 {Name: "Irish", Code: "ga"},
98 {Name: "Italian", Code: "it"},
99 {Name: "Jacalteco", Code: "jac"}, //
100 {Name: "Japanese", Code: "ja"},
101 {Name: "Javanese", Code: "jv"}, // "jw" on Google
102 {Name: "Kabyle", Code: "kab"}, //
103 {Name: "Kannada", Code: "kn"},
104 {Name: "Kaqchikel", Code: "cak"}, //
105 {Name: "Kazakh", Code: "ka"}, // Google only has "kk"
106 {Name: "Kazakh (Cyrillic)", Code: "kk"}, // Google has it as just "Kazakh"
107 {Name: "Kekchí", Code: "kek"}, //
108 {Name: "Khmer", Code: "km"},
109 {Name: "Kinyarwanda", Code: "rw"},
110 {Name: "Kongo", Code: "kg"}, //
111 {Name: "Korean", Code: "ko"},
112 {Name: "Kurdish (Kurmanji)", Code: "ku"},
113 {Name: "Kyrgyz", Code: "ky"},
114 {Name: "Lao", Code: "lo"},
115 {Name: "Latin", Code: "la"},
116 {Name: "Latvian", Code: "lv"},
117 {Name: "Lingala", Code: "ln"}, //
118 {Name: "Lithuanian", Code: "lt"},
119 {Name: "Lukpa", Code: "dop"}, //
120 {Name: "Luxembourgish", Code: "lb"},
121 {Name: "Macedonian", Code: "mk"},
122 {Name: "Malagasy", Code: "mg"},
123 {Name: "Malay", Code: "ms"},
124 {Name: "Malayalam", Code: "ml"},
125 {Name: "Maltese", Code: "mt"},
126 {Name: "Mam", Code: "mam"}, //
127 {Name: "Manx", Code: "gv"}, //
128 {Name: "Maori", Code: "mi"},
129 {Name: "Marathi", Code: "mr"},
130 {Name: "Mari (Eastern)", Code: "mhr"}, //
131 {Name: "Mari (Western)", Code: "mrj"}, //
132 {Name: "Mongolian", Code: "mn"},
133 {Name: "Montenegrin", Code: "me"}, //
134 {Name: "Myanmar (Burmese)", Code: "my"},
135 {Name: "Nahuatl", Code: "nhg"}, //
136 {Name: "Ndyuka", Code: "djk"}, //
137 {Name: "Nepali", Code: "ne"},
138 {Name: "Norwegian", Code: "no"},
139 {Name: "Odia (Oriya)", Code: "or"},
140 {Name: "Ojibwa", Code: "ojb"},
141 {Name: "Oromo", Code: "om"}, //
142 {Name: "Ossetian", Code: "os"}, //
143 {Name: "Paite", Code: "pck"}, //
144 {Name: "Papiamento", Code: "pap"}, //
145 {Name: "Pashto", Code: "ps"},
146 {Name: "Persian", Code: "fa"},
147 {Name: "Polish", Code: "pl"},
148 {Name: "Portuguese", Code: "pt"},
149 {Name: "Potawatomi", Code: "pot"}, //
150 {Name: "Punjabi", Code: "pa"},
151 {Name: "Querétaro Otomi", Code: "otq"}, //
152 {Name: "Quiché", Code: "quc"}, //
153 {Name: "Quichua", Code: "quw"}, //
154 {Name: "Quiotepec Chinantec", Code: "chq"}, //
155 {Name: "Romani", Code: "rmn"}, //
156 {Name: "Romanian", Code: "ro"},
157 {Name: "Rundi", Code: "rn"}, //
158 {Name: "Russian", Code: "ru"},
159 {Name: "Samoan", Code: "sm"},
160 {Name: "Sango", Code: "sg"}, //
161 {Name: "Scots Gaelic", Code: "gd"},
162 {Name: "Serbian", Code: "sr"},
163 {Name: "Seselwa Creole French", Code: "crs"}, //
164 {Name: "Sesotho", Code: "st"},
165 {Name: "Shona", Code: "sn"},
166 {Name: "Shuar", Code: "jiv"}, //
167 {Name: "Sindhi", Code: "sd"},
168 {Name: "Sinhala", Code: "si"},
169 {Name: "Slovak", Code: "sk"},
170 {Name: "Slovenian", Code: "sl"},
171 {Name: "Somali", Code: "so"},
172 {Name: "Spanish", Code: "es"},
173 {Name: "Sundanese", Code: "su"},
174 {Name: "Swahili", Code: "sw"},
175 {Name: "Swedish", Code: "sv"},
176 {Name: "Syriac", Code: "syc"}, // considered "extinct" but is somehow supported
177 {Name: "Tachelhit", Code: "shi"}, //
178 {Name: "Tahitian", Code: "ty"}, //
179 {Name: "Tajik", Code: "tg"},
180 {Name: "Tamil", Code: "ta"},
181 {Name: "Tatar", Code: "tt"},
182 {Name: "Telugu", Code: "te"},
183 {Name: "Tetum", Code: "tet"}, //
184 {Name: "Thai", Code: "th"},
185 {Name: "Tigre", Code: "ti"}, //
186 {Name: "Tiwi", Code: "tw"}, //
187 {Name: "Tok Pisin", Code: "tpi"}, //
188 {Name: "Tonga", Code: "to"}, //
189 {Name: "Tsonga", Code: "ts"},
190 {Name: "Tswana", Code: "tn"}, //
191 {Name: "Turkish", Code: "tr"},
192 {Name: "Turkmen", Code: "tk"},
193 {Name: "Udmurt", Code: "udm"}, //
194 {Name: "Ukrainian", Code: "uk"},
195 {Name: "Uma", Code: "ppk"}, //
196 {Name: "Urdu", Code: "ur"},
197 {Name: "Uspanteco", Code: "usp"}, //
198 {Name: "Uyghur", Code: "uy"}, // "ug" on Google
199 {Name: "Uzbek", Code: "uz"},
200 {Name: "Venda", Code: "ve"}, //
201 {Name: "Vietnamese", Code: "vi"},
202 {Name: "Waray", Code: "war"}, //
203 {Name: "Welsh", Code: "cy"},
204 {Name: "Wolaitta", Code: "wal"}, //
205 {Name: "Wolof", Code: "wol"},
206 {Name: "Xhosa", Code: "xh"},
207 {Name: "Yiddish", Code: "yi"},
208 {Name: "Yoruba", Code: "yo"},
209 {Name: "Yucatán Maya", Code: "yua"}, //
210 {Name: "Zarma", Code: "dje"}, //
211 {Name: "Zulu", Code: "zu"},
212}
213
214func (_ *ICIBAEngine) SourceLanguages() ([]Language, error) { return icibaLanguages, nil }
215
216func (_ *ICIBAEngine) TargetLanguages() ([]Language, error) { return icibaLanguages, nil }
217
218func (_ *ICIBAEngine) SupportsAutodetect() bool { return true }
219
220func (_ *ICIBAEngine) DetectLanguage(text string) (Language, error) { return Language{}, nil }
221
222type icibaTranslateResponse struct {
223 Content struct {
224 From string `json:"from"`
225 Out string `json:"out"`
226 } `json:"content"`
227}
228
229func (_ *ICIBAEngine) Translate(text string, from Language, to Language) (TranslationResult, error) {
230 requestURL, err := url.Parse("https://ifanyi.iciba.com/index.php")
231
232 if err != nil {
233 // The URL is constant, so it should never fail.
234 panic(err)
235 }
236
237 query := url.Values{}
238 query.Add("c", "trans")
239 query.Add("m", "fy")
240 query.Add("client", "6")
241 query.Add("auth_user", "key_ciba")
242
243 sum := md5.Sum([]byte(("6key_cibaifanyicjbysdlove1" + text)))
244
245 query.Add("sign", hex.EncodeToString(sum[:])[:16])
246
247 requestURL.RawQuery = query.Encode()
248
249 formData := url.Values{}
250 formData.Add("from", from.Code)
251 formData.Add("to", to.Code)
252 formData.Add("q", text)
253
254 response, err := http.PostForm(requestURL.String(), formData)
255
256 if err != nil {
257 return TranslationResult{}, err
258 }
259
260 defer response.Body.Close()
261
262 if response.StatusCode != 200 {
263 return TranslationResult{}, fmt.Errorf("got status code %d from iCIBA", response.StatusCode)
264 }
265
266 var responseJSON icibaTranslateResponse
267
268 if err := json.NewDecoder(response.Body).Decode(&responseJSON); err != nil {
269 return TranslationResult{}, err
270 }
271
272 var sourceLanguage Language
273
274 for _, lang := range icibaLanguages {
275 if lang.Code == responseJSON.Content.From {
276 sourceLanguage = lang
277 break
278 }
279 }
280
281 if sourceLanguage == (Language{}) {
282 return TranslationResult{SourceLanguage: from, TranslatedText: responseJSON.Content.Out},
283 fmt.Errorf("language code \"%s\" is not in iCIBA's language list", responseJSON.Content.From)
284 }
285
286 return TranslationResult{
287 SourceLanguage: sourceLanguage,
288 TranslatedText: responseJSON.Content.Out,
289 }, nil
290}
Note: See TracBrowser for help on using the repository browser.