source: code/trunk/morty.go@ 1

Last change on this file since 1 was 1, checked in by asciimoo, 9 years ago

[enh] initial commit

File size: 12.8 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
17
18 "github.com/valyala/fasthttp"
19 "golang.org/x/net/html"
20 "golang.org/x/text/encoding/charmap"
21)
22
23const (
24 STATE_DEFAULT int = 0
25 STATE_IN_STYLE int = 1
26 STATE_IN_NOSCRIPT int = 2
27)
28
29var CLIENT *fasthttp.Client = &fasthttp.Client{
30 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
31}
32
33var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)")
34
35var UNSAFE_ELEMENTS [][]byte = [][]byte{
36 []byte("applet"),
37 []byte("canvas"),
38 []byte("embed"),
39 //[]byte("iframe"),
40 []byte("script"),
41}
42
43var SAFE_ATTRIBUTES [][]byte = [][]byte{
44 []byte("abbr"),
45 []byte("accesskey"),
46 []byte("align"),
47 []byte("alt"),
48 []byte("autocomplete"),
49 []byte("charset"),
50 []byte("checked"),
51 []byte("class"),
52 []byte("content"),
53 []byte("contenteditable"),
54 []byte("contextmenu"),
55 []byte("dir"),
56 []byte("for"),
57 []byte("height"),
58 []byte("hidden"),
59 []byte("id"),
60 []byte("lang"),
61 []byte("media"),
62 []byte("method"),
63 []byte("name"),
64 []byte("nowrap"),
65 []byte("placeholder"),
66 []byte("property"),
67 []byte("rel"),
68 []byte("spellcheck"),
69 []byte("tabindex"),
70 []byte("target"),
71 []byte("title"),
72 []byte("translate"),
73 []byte("type"),
74 []byte("value"),
75 []byte("width"),
76}
77
78var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
79 []byte("area"),
80 []byte("base"),
81 []byte("br"),
82 []byte("col"),
83 []byte("embed"),
84 []byte("hr"),
85 []byte("img"),
86 []byte("input"),
87 []byte("keygen"),
88 []byte("link"),
89 []byte("meta"),
90 []byte("param"),
91 []byte("source"),
92 []byte("track"),
93 []byte("wbr"),
94}
95
96type Proxy struct {
97 Key []byte
98}
99
100type RequestConfig struct {
101 Key []byte
102 baseURL *url.URL
103}
104
105var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" />`
106
107var HTML_BODY_EXTENSION string = `
108</div>
109<div id="mortyheader">
110 <input type="checkbox" id="mortytoggle" autocomplete="off" />
111 <div><p>Proxified view,<br />visit <a href="%s">original site</a>.</p><p><label for="mortytoggle">hide</label></p></div>
112</div>
113<style>
114#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 1000; }
115#mortyheader a { color: #3498db; }
116#mortyheader div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #3498db; background: #FFF; }
117#mortyheader label { text-align: right; }
118input[type=checkbox]#mortytoggle { display: none; }
119input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
120</style>
121`
122
123func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
124 requestHash := popRequestParam(ctx, []byte("mortyhash"))
125
126 requestURI := popRequestParam(ctx, []byte("mortyurl"))
127
128 if requestURI == nil {
129 p.breakOnError(ctx, errors.New(`missing "mortyurl" URL parameter`))
130 return
131 }
132
133 if p.Key != nil {
134 if !verifyRequestURI(requestURI, requestHash, p.Key) {
135 p.breakOnError(ctx, errors.New("invalid hash"))
136 return
137 }
138 }
139
140 parsedURI, err := url.Parse(string(requestURI))
141
142 if p.breakOnError(ctx, err) {
143 return
144 }
145
146 req := fasthttp.AcquireRequest()
147 defer fasthttp.ReleaseRequest(req)
148
149 reqQuery := parsedURI.Query()
150 ctx.QueryArgs().VisitAll(func(key, value []byte) {
151 reqQuery.Add(string(key), string(value))
152 })
153
154 parsedURI.RawQuery = reqQuery.Encode()
155
156 uriStr := parsedURI.String()
157
158 log.Println("getting", uriStr)
159
160 req.SetRequestURI(uriStr)
161 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
162
163 resp := fasthttp.AcquireResponse()
164 defer fasthttp.ReleaseResponse(resp)
165
166 req.Header.SetMethodBytes(ctx.Method())
167 if ctx.IsPost() || ctx.IsPut() {
168 req.SetBody(ctx.PostBody())
169 }
170
171 if p.breakOnError(ctx, CLIENT.Do(req, resp)) {
172 return
173 }
174
175 if resp.StatusCode() != 200 {
176 switch resp.StatusCode() {
177 case 301, 302:
178 loc := resp.Header.Peek("Location")
179 if loc != nil {
180 url, err := proxifyURI(&RequestConfig{p.Key, parsedURI}, string(loc))
181 if err == nil {
182 ctx.SetStatusCode(resp.StatusCode())
183 ctx.Response.Header.Add("Location", url)
184 log.Println("redirect to", string(loc))
185 return
186 }
187 }
188 }
189 log.Println("invalid request:", resp.StatusCode())
190 return
191 }
192
193 contentType := resp.Header.Peek("Content-Type")
194
195 if contentType == nil {
196 p.breakOnError(ctx, errors.New("invalid content type"))
197 return
198 }
199
200 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
201
202 var responseBody []byte
203
204 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
205 var err error
206 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
207 if p.breakOnError(ctx, err) {
208 return
209 }
210 } else {
211 responseBody = resp.Body()
212 }
213
214 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
215
216 switch {
217 case bytes.Contains(contentType, []byte("css")):
218 sanitizeCSS(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
219 case bytes.Contains(contentType, []byte("html")):
220 sanitizeHTML(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
221 default:
222 ctx.Write(responseBody)
223 }
224
225}
226
227func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
228 param := ctx.QueryArgs().PeekBytes(paramName)
229
230 if param == nil {
231 param = ctx.PostArgs().PeekBytes(paramName)
232 if param != nil {
233 ctx.PostArgs().DelBytes(paramName)
234 }
235 } else {
236 ctx.QueryArgs().DelBytes(paramName)
237 }
238
239 return param
240}
241
242func sanitizeCSS(rc *RequestConfig, ctx *fasthttp.RequestCtx, css []byte) {
243 // TODO
244
245 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
246
247 if urlSlices == nil {
248 ctx.Write(css)
249 return
250 }
251
252 startIndex := 0
253
254 for _, s := range urlSlices {
255 urlStart := s[4]
256 urlEnd := s[5]
257
258 if uri, err := proxifyURI(rc, string(css[urlStart:urlEnd])); err == nil {
259 ctx.Write(css[startIndex:urlStart])
260 ctx.Write([]byte(uri))
261 startIndex = urlEnd
262 } else {
263 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
264 }
265 }
266 if startIndex < len(css) {
267 ctx.Write(css[startIndex:len(css)])
268 }
269}
270
271func sanitizeHTML(rc *RequestConfig, ctx *fasthttp.RequestCtx, htmlDoc []byte) {
272 r := bytes.NewReader(htmlDoc)
273 decoder := html.NewTokenizer(r)
274 decoder.AllowCDATA(true)
275
276 unsafeElements := make([][]byte, 0, 8)
277 state := STATE_DEFAULT
278
279 for {
280 token := decoder.Next()
281 if token == html.ErrorToken {
282 err := decoder.Err()
283 if err != io.EOF {
284 log.Println("failed to parse HTML:")
285 }
286 break
287 }
288
289 if len(unsafeElements) == 0 {
290
291 switch token {
292 case html.StartTagToken, html.SelfClosingTagToken:
293 tag, hasAttrs := decoder.TagName()
294 safe := !inArray(tag, UNSAFE_ELEMENTS)
295 if !safe {
296 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
297 var unsafeTag []byte = make([]byte, len(tag))
298 copy(unsafeTag, tag)
299 unsafeElements = append(unsafeElements, unsafeTag)
300 }
301 break
302 }
303 if bytes.Equal(tag, []byte("noscript")) {
304 state = STATE_IN_NOSCRIPT
305 break
306 }
307 var attrs [][][]byte
308 fmt.Fprintf(ctx, "<%s", tag)
309 if hasAttrs {
310 for {
311 attrName, attrValue, moreAttr := decoder.TagAttr()
312 attrs = append(attrs, [][]byte{attrName, attrValue})
313 if !moreAttr {
314 break
315 }
316 }
317 if bytes.Equal(tag, []byte("meta")) {
318 sanitizeMetaAttrs(rc, ctx, attrs)
319 } else {
320 sanitizeAttrs(rc, ctx, attrs)
321 }
322 }
323 if token == html.SelfClosingTagToken {
324 fmt.Fprintf(ctx, " />")
325 } else {
326 fmt.Fprintf(ctx, ">")
327 if bytes.Equal(tag, []byte("style")) {
328 state = STATE_IN_STYLE
329 }
330 }
331 if bytes.Equal(tag, []byte("form")) {
332 var formURL *url.URL
333 for _, attr := range attrs {
334 if bytes.Equal(attr[0], []byte("action")) {
335 formURL, _ = url.Parse(string(attr[1]))
336 mergeURIs(rc.baseURL, formURL)
337 break
338 }
339 }
340 if formURL == nil {
341 formURL = rc.baseURL
342 }
343 fmt.Fprintf(ctx, HTML_FORM_EXTENSION, formURL.String())
344
345 }
346
347 case html.EndTagToken:
348 tag, _ := decoder.TagName()
349 writeEndTag := true
350 switch string(tag) {
351 case "body":
352 fmt.Fprintf(ctx, HTML_BODY_EXTENSION, rc.baseURL.String())
353 case "style":
354 state = STATE_DEFAULT
355 case "noscript":
356 state = STATE_DEFAULT
357 writeEndTag = false
358 }
359 // skip noscript tags - only the tag, not the content, because javascript is sanitized
360 if writeEndTag {
361 fmt.Fprintf(ctx, "</%s>", tag)
362 }
363
364 case html.TextToken:
365 switch state {
366 case STATE_DEFAULT:
367 fmt.Fprintf(ctx, "%s", decoder.Raw())
368 case STATE_IN_STYLE:
369 sanitizeCSS(rc, ctx, decoder.Raw())
370 case STATE_IN_NOSCRIPT:
371 sanitizeHTML(rc, ctx, decoder.Raw())
372 }
373
374 case html.DoctypeToken, html.CommentToken:
375 ctx.Write(decoder.Raw())
376 }
377 } else {
378 switch token {
379 case html.StartTagToken:
380 tag, _ := decoder.TagName()
381 if inArray(tag, UNSAFE_ELEMENTS) {
382 unsafeElements = append(unsafeElements, tag)
383 }
384
385 case html.EndTagToken:
386 tag, _ := decoder.TagName()
387 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
388 unsafeElements = unsafeElements[:len(unsafeElements)-1]
389 }
390 }
391 }
392 }
393}
394
395func sanitizeMetaAttrs(rc *RequestConfig, ctx *fasthttp.RequestCtx, attrs [][][]byte) {
396 var http_equiv []byte
397 var content []byte
398
399 for _, attr := range attrs {
400 attrName := attr[0]
401 attrValue := attr[1]
402 if bytes.Equal(attrName, []byte("http-equiv")) {
403 http_equiv = bytes.ToLower(attrValue)
404 }
405 if bytes.Equal(attrName, []byte("content")) {
406 content = attrValue
407 }
408 }
409
410 if bytes.Equal(http_equiv, []byte("refresh")) && bytes.Index(content, []byte(";url=")) != -1 {
411 parts := bytes.SplitN(content, []byte(";url="), 2)
412 if uri, err := proxifyURI(rc, string(parts[1])); err == nil {
413 fmt.Fprintf(ctx, ` http-equiv="refresh" content="%s;%s"`, parts[0], uri)
414 }
415 } else {
416 sanitizeAttrs(rc, ctx, attrs)
417 }
418
419}
420
421func sanitizeAttrs(rc *RequestConfig, ctx *fasthttp.RequestCtx, attrs [][][]byte) {
422 for _, attr := range attrs {
423 sanitizeAttr(rc, ctx, attr[0], attr[1])
424 }
425}
426
427func sanitizeAttr(rc *RequestConfig, ctx *fasthttp.RequestCtx, attrName, attrValue []byte) {
428 if inArray(attrName, SAFE_ATTRIBUTES) {
429 fmt.Fprintf(ctx, " %s=\"%s\"", attrName, attrValue)
430 return
431 }
432 switch string(attrName) {
433 case "src", "href", "action":
434 if uri, err := proxifyURI(rc, string(attrValue)); err == nil {
435 fmt.Fprintf(ctx, " %s=\"%s\"", attrName, uri)
436 } else {
437 log.Println("cannot proxify uri:", attrValue)
438 }
439 case "style":
440 fmt.Fprintf(ctx, " %s=\"", attrName)
441 sanitizeCSS(rc, ctx, attrValue)
442 ctx.Write([]byte("\""))
443 }
444}
445
446func mergeURIs(u1, u2 *url.URL) {
447 if u2.Scheme == "" || u2.Scheme == "//" {
448 u2.Scheme = u1.Scheme
449 }
450 if u2.Host == "" {
451 u2.Host = u1.Host
452 if len(u2.Path) == 0 || u2.Path[0] != '/' {
453 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
454 }
455 }
456}
457
458func proxifyURI(rc *RequestConfig, uri string) (string, error) {
459 // TODO check malicious data: - e.g. data:script
460 if strings.HasPrefix(uri, "data:") {
461 return uri, nil
462 }
463
464 if len(uri) > 0 && uri[0] == '#' {
465 return uri, nil
466 }
467
468 u, err := url.Parse(uri)
469 if err != nil {
470 return "", err
471 }
472 mergeURIs(rc.baseURL, u)
473
474 uri = u.String()
475
476 if rc.Key == nil {
477 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
478 }
479 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
480}
481
482func inArray(b []byte, a [][]byte) bool {
483 for _, b2 := range a {
484 if bytes.Equal(b, b2) {
485 return true
486 }
487 }
488 return false
489}
490
491func hash(msg string, key []byte) string {
492 mac := hmac.New(sha256.New, key)
493 mac.Write([]byte(msg))
494 return hex.EncodeToString(mac.Sum(nil))
495}
496
497func verifyRequestURI(uri, hashMsg, key []byte) bool {
498 h := make([]byte, hex.DecodedLen(len(hashMsg)))
499 _, err := hex.Decode(h, hashMsg)
500 if err != nil {
501 log.Println("hmac error:", err)
502 return false
503 }
504 mac := hmac.New(sha256.New, key)
505 mac.Write(uri)
506 return hmac.Equal(h, mac.Sum(nil))
507}
508
509func (p *Proxy) breakOnError(ctx *fasthttp.RequestCtx, err error) bool {
510 if err == nil {
511 return false
512 }
513 ctx.SetStatusCode(404)
514 ctx.SetContentType("text/html")
515 ctx.Write([]byte(`<!doctype html>
516<head>
517<title>MortyError</title>
518</head>
519<body><h2>Error!</h2>`))
520 ctx.Write([]byte("<h3>"))
521 ctx.Write([]byte(html.EscapeString(err.Error())))
522 ctx.Write([]byte("</h3>"))
523 if p.Key == nil {
524 ctx.Write([]byte(`
525<form action="post">
526 Visit url: <input placeholder="https://url.." name="mortyurl" />
527 <input type="submit" value="go" />
528</form>`))
529 }
530 ctx.Write([]byte(`
531</body>
532</html>`))
533 return true
534}
535
536func main() {
537
538 listen := flag.String("listen", "127.0.0.1:3000", "Proxy listen address")
539 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
540 flag.Parse()
541
542 p := &Proxy{}
543
544 if *key != "" {
545 p.Key = []byte(*key)
546 }
547
548 log.Println("listening on", *listen)
549
550 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
551 log.Fatal("Error in ListenAndServe:", err)
552 }
553}
Note: See TracBrowser for help on using the repository browser.