[6] | 1 | package simplytranslate_engines
|
---|
| 2 |
|
---|
| 3 | import (
|
---|
| 4 | "crypto/md5"
|
---|
| 5 | "encoding/hex"
|
---|
| 6 | "encoding/json"
|
---|
| 7 | "fmt"
|
---|
| 8 | "net/http"
|
---|
| 9 | "net/url"
|
---|
| 10 | )
|
---|
| 11 |
|
---|
| 12 | // ICIBAEngine is an engine that fetches data from https://www.iciba.com.
|
---|
| 13 | type ICIBAEngine struct{}
|
---|
| 14 |
|
---|
| 15 | func (_ *ICIBAEngine) InternalName() string { return "iciba" }
|
---|
| 16 |
|
---|
| 17 | func (_ *ICIBAEngine) DisplayName() string { return "iCIBA" }
|
---|
| 18 |
|
---|
| 19 | var icibaLanguages = []Language{
|
---|
| 20 | // ICIBA does have an API, but they return Chinese names.
|
---|
| 21 | // For languages already present in Google translate, the English
|
---|
| 22 | // names in that engine file are used; Otherwise official names
|
---|
| 23 | // as researched on Wikipedia are used. They're validated against
|
---|
| 24 | // the Chinese names to the best of my ability.
|
---|
| 25 | // Missing "cni", "kbh", "tmh"
|
---|
| 26 | // due to conflict between ISO-639 table and Chinese label
|
---|
| 27 | // one "//" means on iciba but not on google
|
---|
| 28 | {Name: "Achinese", Code: "ace"}, //
|
---|
| 29 | {Name: "Achuar-Shiwiar", Code: "acu"}, //
|
---|
| 30 | {Name: "Afrikaans", Code: "af"},
|
---|
| 31 | {Name: "Aguaruna", Code: "agr"}, //
|
---|
| 32 | {Name: "Akawaio", Code: "ake"}, //
|
---|
| 33 | {Name: "Albanian", Code: "sq"},
|
---|
| 34 | {Name: "Amharic", Code: "am"},
|
---|
| 35 | {Name: "Arabic", Code: "ar"},
|
---|
| 36 | {Name: "Armenian", Code: "hy"},
|
---|
| 37 | {Name: "Azerbaijani", Code: "az"},
|
---|
| 38 | {Name: "Barasana-Eduria", Code: "bsn"}, //
|
---|
| 39 | {Name: "Bashkir", Code: "ba"}, //
|
---|
| 40 | {Name: "Basque", Code: "eu"},
|
---|
| 41 | {Name: "Belarusian", Code: "be"},
|
---|
| 42 | {Name: "Bemba", Code: "bem"}, //
|
---|
| 43 | {Name: "Bengali", Code: "bn"},
|
---|
| 44 | {Name: "Berber", Code: "ber"}, //
|
---|
| 45 | {Name: "Bislama", Code: "bi"}, //
|
---|
| 46 | {Name: "Bosnian", Code: "bs"},
|
---|
| 47 | {Name: "Breton", Code: "br"}, //
|
---|
| 48 | {Name: "Bulgarian", Code: "bg"},
|
---|
| 49 | {Name: "Cabécar", Code: "cjp"}, //
|
---|
| 50 | {Name: "Cantonese", Code: "yue"},
|
---|
| 51 | {Name: "Catalan", Code: "ca"},
|
---|
| 52 | {Name: "Cebuano", Code: "ceb"},
|
---|
| 53 | {Name: "Chamorro", Code: "cha"}, //
|
---|
| 54 | {Name: "Cherokee", Code: "chr"}, //
|
---|
| 55 | {Name: "Chichewa", Code: "ny"},
|
---|
| 56 | {Name: "Chinese (Simplified)", Code: "zh"}, // "zh-cn" on Google
|
---|
| 57 | {Name: "Chinese (Traditional)", Code: "cht"}, // "zh-tw" on Google
|
---|
| 58 | {Name: "Chuvash", Code: "cv"},
|
---|
| 59 | {Name: "Coptic", Code: "cop"}, //
|
---|
| 60 | {Name: "Corsican", Code: "co"},
|
---|
| 61 | {Name: "Croatian", Code: "hr"},
|
---|
| 62 | {Name: "Czech", Code: "cs"},
|
---|
| 63 | {Name: "Danish", Code: "da"},
|
---|
| 64 | {Name: "Dhivehi", Code: "dv"}, //
|
---|
| 65 | {Name: "Dinka", Code: "dik"}, //
|
---|
| 66 | {Name: "Dutch", Code: "nl"},
|
---|
| 67 | {Name: "Dzongkha", Code: "dz"}, //
|
---|
| 68 | {Name: "English", Code: "en"},
|
---|
| 69 | {Name: "Esperanto", Code: "eo"},
|
---|
| 70 | {Name: "Estonian", Code: "et"},
|
---|
| 71 | {Name: "Ewe", Code: "ee"}, //
|
---|
| 72 | {Name: "Faroese", Code: "fo"}, //
|
---|
| 73 | {Name: "Fijian", Code: "fj"}, //
|
---|
| 74 | {Name: "Filipino", Code: "fil"}, // "tl" on Google
|
---|
| 75 | {Name: "Finnish", Code: "fi"},
|
---|
| 76 | {Name: "French", Code: "fr"},
|
---|
| 77 | {Name: "Frisian", Code: "fy"},
|
---|
| 78 | {Name: "Galela", Code: "gbi"}, //
|
---|
| 79 | {Name: "Galician", Code: "gl"},
|
---|
| 80 | {Name: "Ganda", Code: "lg"}, //
|
---|
| 81 | {Name: "Georgian", Code: "jy"}, // "ka" on Google
|
---|
| 82 | {Name: "German", Code: "de"},
|
---|
| 83 | {Name: "Greek", Code: "el"},
|
---|
| 84 | {Name: "Guerrero Amuzgo", Code: "amu"}, //
|
---|
| 85 | {Name: "Gujarati", Code: "gu"},
|
---|
| 86 | {Name: "Haitian Creole", Code: "ht"},
|
---|
| 87 | {Name: "Hausa", Code: "ha"},
|
---|
| 88 | {Name: "Hawaiian", Code: "haw"},
|
---|
| 89 | {Name: "Hebrew", Code: "he"}, // "iw" on Google
|
---|
| 90 | {Name: "Hindi", Code: "hi"},
|
---|
| 91 | {Name: "Hmong Daw", Code: "mww"}, //
|
---|
| 92 | {Name: "Hmong", Code: "hmn"}, // not in iciba
|
---|
| 93 | {Name: "Hungarian", Code: "hu"},
|
---|
| 94 | {Name: "Icelandic", Code: "is"},
|
---|
| 95 | {Name: "Igbo", Code: "ig"},
|
---|
| 96 | {Name: "Indonesian", Code: "id"},
|
---|
| 97 | {Name: "Irish", Code: "ga"},
|
---|
| 98 | {Name: "Italian", Code: "it"},
|
---|
| 99 | {Name: "Jacalteco", Code: "jac"}, //
|
---|
| 100 | {Name: "Japanese", Code: "ja"},
|
---|
| 101 | {Name: "Javanese", Code: "jv"}, // "jw" on Google
|
---|
| 102 | {Name: "Kabyle", Code: "kab"}, //
|
---|
| 103 | {Name: "Kannada", Code: "kn"},
|
---|
| 104 | {Name: "Kaqchikel", Code: "cak"}, //
|
---|
| 105 | {Name: "Kazakh", Code: "ka"}, // Google only has "kk"
|
---|
| 106 | {Name: "Kazakh (Cyrillic)", Code: "kk"}, // Google has it as just "Kazakh"
|
---|
| 107 | {Name: "Kekchí", Code: "kek"}, //
|
---|
| 108 | {Name: "Khmer", Code: "km"},
|
---|
| 109 | {Name: "Kinyarwanda", Code: "rw"},
|
---|
| 110 | {Name: "Kongo", Code: "kg"}, //
|
---|
| 111 | {Name: "Korean", Code: "ko"},
|
---|
| 112 | {Name: "Kurdish (Kurmanji)", Code: "ku"},
|
---|
| 113 | {Name: "Kyrgyz", Code: "ky"},
|
---|
| 114 | {Name: "Lao", Code: "lo"},
|
---|
| 115 | {Name: "Latin", Code: "la"},
|
---|
| 116 | {Name: "Latvian", Code: "lv"},
|
---|
| 117 | {Name: "Lingala", Code: "ln"}, //
|
---|
| 118 | {Name: "Lithuanian", Code: "lt"},
|
---|
| 119 | {Name: "Lukpa", Code: "dop"}, //
|
---|
| 120 | {Name: "Luxembourgish", Code: "lb"},
|
---|
| 121 | {Name: "Macedonian", Code: "mk"},
|
---|
| 122 | {Name: "Malagasy", Code: "mg"},
|
---|
| 123 | {Name: "Malay", Code: "ms"},
|
---|
| 124 | {Name: "Malayalam", Code: "ml"},
|
---|
| 125 | {Name: "Maltese", Code: "mt"},
|
---|
| 126 | {Name: "Mam", Code: "mam"}, //
|
---|
| 127 | {Name: "Manx", Code: "gv"}, //
|
---|
| 128 | {Name: "Maori", Code: "mi"},
|
---|
| 129 | {Name: "Marathi", Code: "mr"},
|
---|
| 130 | {Name: "Mari (Eastern)", Code: "mhr"}, //
|
---|
| 131 | {Name: "Mari (Western)", Code: "mrj"}, //
|
---|
| 132 | {Name: "Mongolian", Code: "mn"},
|
---|
| 133 | {Name: "Montenegrin", Code: "me"}, //
|
---|
| 134 | {Name: "Myanmar (Burmese)", Code: "my"},
|
---|
| 135 | {Name: "Nahuatl", Code: "nhg"}, //
|
---|
| 136 | {Name: "Ndyuka", Code: "djk"}, //
|
---|
| 137 | {Name: "Nepali", Code: "ne"},
|
---|
| 138 | {Name: "Norwegian", Code: "no"},
|
---|
| 139 | {Name: "Odia (Oriya)", Code: "or"},
|
---|
| 140 | {Name: "Ojibwa", Code: "ojb"},
|
---|
| 141 | {Name: "Oromo", Code: "om"}, //
|
---|
| 142 | {Name: "Ossetian", Code: "os"}, //
|
---|
| 143 | {Name: "Paite", Code: "pck"}, //
|
---|
| 144 | {Name: "Papiamento", Code: "pap"}, //
|
---|
| 145 | {Name: "Pashto", Code: "ps"},
|
---|
| 146 | {Name: "Persian", Code: "fa"},
|
---|
| 147 | {Name: "Polish", Code: "pl"},
|
---|
| 148 | {Name: "Portuguese", Code: "pt"},
|
---|
| 149 | {Name: "Potawatomi", Code: "pot"}, //
|
---|
| 150 | {Name: "Punjabi", Code: "pa"},
|
---|
| 151 | {Name: "Querétaro Otomi", Code: "otq"}, //
|
---|
| 152 | {Name: "Quiché", Code: "quc"}, //
|
---|
| 153 | {Name: "Quichua", Code: "quw"}, //
|
---|
| 154 | {Name: "Quiotepec Chinantec", Code: "chq"}, //
|
---|
| 155 | {Name: "Romani", Code: "rmn"}, //
|
---|
| 156 | {Name: "Romanian", Code: "ro"},
|
---|
| 157 | {Name: "Rundi", Code: "rn"}, //
|
---|
| 158 | {Name: "Russian", Code: "ru"},
|
---|
| 159 | {Name: "Samoan", Code: "sm"},
|
---|
| 160 | {Name: "Sango", Code: "sg"}, //
|
---|
| 161 | {Name: "Scots Gaelic", Code: "gd"},
|
---|
| 162 | {Name: "Serbian", Code: "sr"},
|
---|
| 163 | {Name: "Seselwa Creole French", Code: "crs"}, //
|
---|
| 164 | {Name: "Sesotho", Code: "st"},
|
---|
| 165 | {Name: "Shona", Code: "sn"},
|
---|
| 166 | {Name: "Shuar", Code: "jiv"}, //
|
---|
| 167 | {Name: "Sindhi", Code: "sd"},
|
---|
| 168 | {Name: "Sinhala", Code: "si"},
|
---|
| 169 | {Name: "Slovak", Code: "sk"},
|
---|
| 170 | {Name: "Slovenian", Code: "sl"},
|
---|
| 171 | {Name: "Somali", Code: "so"},
|
---|
| 172 | {Name: "Spanish", Code: "es"},
|
---|
| 173 | {Name: "Sundanese", Code: "su"},
|
---|
| 174 | {Name: "Swahili", Code: "sw"},
|
---|
| 175 | {Name: "Swedish", Code: "sv"},
|
---|
| 176 | {Name: "Syriac", Code: "syc"}, // considered "extinct" but is somehow supported
|
---|
| 177 | {Name: "Tachelhit", Code: "shi"}, //
|
---|
| 178 | {Name: "Tahitian", Code: "ty"}, //
|
---|
| 179 | {Name: "Tajik", Code: "tg"},
|
---|
| 180 | {Name: "Tamil", Code: "ta"},
|
---|
| 181 | {Name: "Tatar", Code: "tt"},
|
---|
| 182 | {Name: "Telugu", Code: "te"},
|
---|
| 183 | {Name: "Tetum", Code: "tet"}, //
|
---|
| 184 | {Name: "Thai", Code: "th"},
|
---|
| 185 | {Name: "Tigre", Code: "ti"}, //
|
---|
| 186 | {Name: "Tiwi", Code: "tw"}, //
|
---|
| 187 | {Name: "Tok Pisin", Code: "tpi"}, //
|
---|
| 188 | {Name: "Tonga", Code: "to"}, //
|
---|
| 189 | {Name: "Tsonga", Code: "ts"},
|
---|
| 190 | {Name: "Tswana", Code: "tn"}, //
|
---|
| 191 | {Name: "Turkish", Code: "tr"},
|
---|
| 192 | {Name: "Turkmen", Code: "tk"},
|
---|
| 193 | {Name: "Udmurt", Code: "udm"}, //
|
---|
| 194 | {Name: "Ukrainian", Code: "uk"},
|
---|
| 195 | {Name: "Uma", Code: "ppk"}, //
|
---|
| 196 | {Name: "Urdu", Code: "ur"},
|
---|
| 197 | {Name: "Uspanteco", Code: "usp"}, //
|
---|
| 198 | {Name: "Uyghur", Code: "uy"}, // "ug" on Google
|
---|
| 199 | {Name: "Uzbek", Code: "uz"},
|
---|
| 200 | {Name: "Venda", Code: "ve"}, //
|
---|
| 201 | {Name: "Vietnamese", Code: "vi"},
|
---|
| 202 | {Name: "Waray", Code: "war"}, //
|
---|
| 203 | {Name: "Welsh", Code: "cy"},
|
---|
| 204 | {Name: "Wolaitta", Code: "wal"}, //
|
---|
| 205 | {Name: "Wolof", Code: "wol"},
|
---|
| 206 | {Name: "Xhosa", Code: "xh"},
|
---|
| 207 | {Name: "Yiddish", Code: "yi"},
|
---|
| 208 | {Name: "Yoruba", Code: "yo"},
|
---|
| 209 | {Name: "Yucatán Maya", Code: "yua"}, //
|
---|
| 210 | {Name: "Zarma", Code: "dje"}, //
|
---|
| 211 | {Name: "Zulu", Code: "zu"},
|
---|
| 212 | }
|
---|
| 213 |
|
---|
| 214 | func (_ *ICIBAEngine) SourceLanguages() ([]Language, error) { return icibaLanguages, nil }
|
---|
| 215 |
|
---|
| 216 | func (_ *ICIBAEngine) TargetLanguages() ([]Language, error) { return icibaLanguages, nil }
|
---|
| 217 |
|
---|
| 218 | func (_ *ICIBAEngine) SupportsAutodetect() bool { return true }
|
---|
| 219 |
|
---|
| 220 | func (_ *ICIBAEngine) DetectLanguage(text string) (Language, error) { return Language{}, nil }
|
---|
| 221 |
|
---|
| 222 | type icibaTranslateResponse struct {
|
---|
| 223 | Content struct {
|
---|
| 224 | From string `json:"from"`
|
---|
| 225 | Out string `json:"out"`
|
---|
| 226 | } `json:"content"`
|
---|
| 227 | }
|
---|
| 228 |
|
---|
| 229 | func (_ *ICIBAEngine) Translate(text string, from Language, to Language) (TranslationResult, error) {
|
---|
| 230 | requestURL, err := url.Parse("https://ifanyi.iciba.com/index.php")
|
---|
| 231 |
|
---|
| 232 | if err != nil {
|
---|
| 233 | // The URL is constant, so it should never fail.
|
---|
| 234 | panic(err)
|
---|
| 235 | }
|
---|
| 236 |
|
---|
| 237 | query := url.Values{}
|
---|
| 238 | query.Add("c", "trans")
|
---|
| 239 | query.Add("m", "fy")
|
---|
| 240 | query.Add("client", "6")
|
---|
| 241 | query.Add("auth_user", "key_ciba")
|
---|
| 242 |
|
---|
| 243 | sum := md5.Sum([]byte(("6key_cibaifanyicjbysdlove1" + text)))
|
---|
| 244 |
|
---|
| 245 | query.Add("sign", hex.EncodeToString(sum[:])[:16])
|
---|
| 246 |
|
---|
| 247 | requestURL.RawQuery = query.Encode()
|
---|
| 248 |
|
---|
| 249 | formData := url.Values{}
|
---|
| 250 | formData.Add("from", from.Code)
|
---|
| 251 | formData.Add("to", to.Code)
|
---|
| 252 | formData.Add("q", text)
|
---|
| 253 |
|
---|
| 254 | response, err := http.PostForm(requestURL.String(), formData)
|
---|
| 255 |
|
---|
| 256 | if err != nil {
|
---|
| 257 | return TranslationResult{}, err
|
---|
| 258 | }
|
---|
| 259 |
|
---|
| 260 | defer response.Body.Close()
|
---|
| 261 |
|
---|
| 262 | if response.StatusCode != 200 {
|
---|
| 263 | return TranslationResult{}, fmt.Errorf("got status code %d from iCIBA", response.StatusCode)
|
---|
| 264 | }
|
---|
| 265 |
|
---|
| 266 | var responseJSON icibaTranslateResponse
|
---|
| 267 |
|
---|
| 268 | if err := json.NewDecoder(response.Body).Decode(&responseJSON); err != nil {
|
---|
| 269 | return TranslationResult{}, err
|
---|
| 270 | }
|
---|
| 271 |
|
---|
| 272 | var sourceLanguage Language
|
---|
| 273 |
|
---|
| 274 | for _, lang := range icibaLanguages {
|
---|
| 275 | if lang.Code == responseJSON.Content.From {
|
---|
| 276 | sourceLanguage = lang
|
---|
| 277 | break
|
---|
| 278 | }
|
---|
| 279 | }
|
---|
| 280 |
|
---|
| 281 | if sourceLanguage == (Language{}) {
|
---|
| 282 | return TranslationResult{SourceLanguage: from, TranslatedText: responseJSON.Content.Out},
|
---|
| 283 | fmt.Errorf("language code \"%s\" is not in iCIBA's language list", responseJSON.Content.From)
|
---|
| 284 | }
|
---|
| 285 |
|
---|
| 286 | return TranslationResult{
|
---|
| 287 | SourceLanguage: sourceLanguage,
|
---|
| 288 | TranslatedText: responseJSON.Content.Out,
|
---|
| 289 | }, nil
|
---|
| 290 | }
|
---|