source: code/trunk/morty.go@ 19

Last change on this file since 19 was 18, checked in by asciimoo, 9 years ago

[fix] avoid fasthttp panic on .onion urls - see https://github.com/valyala/fasthttp/issues/196

File size: 15.6 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
[4]17 "time"
[1]18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
31 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
32}
33
[15]34var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]35
36var UNSAFE_ELEMENTS [][]byte = [][]byte{
37 []byte("applet"),
38 []byte("canvas"),
39 []byte("embed"),
40 //[]byte("iframe"),
41 []byte("script"),
42}
43
44var SAFE_ATTRIBUTES [][]byte = [][]byte{
45 []byte("abbr"),
46 []byte("accesskey"),
47 []byte("align"),
48 []byte("alt"),
[13]49 []byte("as"),
[1]50 []byte("autocomplete"),
51 []byte("charset"),
52 []byte("checked"),
53 []byte("class"),
54 []byte("content"),
55 []byte("contenteditable"),
56 []byte("contextmenu"),
57 []byte("dir"),
58 []byte("for"),
59 []byte("height"),
60 []byte("hidden"),
61 []byte("id"),
62 []byte("lang"),
63 []byte("media"),
64 []byte("method"),
65 []byte("name"),
66 []byte("nowrap"),
67 []byte("placeholder"),
68 []byte("property"),
69 []byte("rel"),
70 []byte("spellcheck"),
71 []byte("tabindex"),
72 []byte("target"),
73 []byte("title"),
74 []byte("translate"),
75 []byte("type"),
76 []byte("value"),
77 []byte("width"),
78}
79
80var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
81 []byte("area"),
82 []byte("base"),
83 []byte("br"),
84 []byte("col"),
85 []byte("embed"),
86 []byte("hr"),
87 []byte("img"),
88 []byte("input"),
89 []byte("keygen"),
90 []byte("link"),
91 []byte("meta"),
92 []byte("param"),
93 []byte("source"),
94 []byte("track"),
95 []byte("wbr"),
96}
97
98type Proxy struct {
[4]99 Key []byte
100 RequestTimeout time.Duration
[1]101}
102
103type RequestConfig struct {
104 Key []byte
105 baseURL *url.URL
106}
107
[2]108var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]109
110var HTML_BODY_EXTENSION string = `
111<div id="mortyheader">
112 <input type="checkbox" id="mortytoggle" autocomplete="off" />
[5]113 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
[1]114</div>
115<style>
[5]116#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
[1]117#mortyheader a { color: #3498db; }
[5]118#mortyheader p { padding: 0; margin: 0; }
119#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
120#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
[1]121input[type=checkbox]#mortytoggle { display: none; }
122input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
123</style>
124`
125
126func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]127
128 if appRequestHandler(ctx) {
129 return
130 }
131
[1]132 requestHash := popRequestParam(ctx, []byte("mortyhash"))
133
134 requestURI := popRequestParam(ctx, []byte("mortyurl"))
135
136 if requestURI == nil {
[11]137 p.serveMainPage(ctx, nil)
[1]138 return
139 }
140
141 if p.Key != nil {
142 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[11]143 p.serveMainPage(ctx, errors.New(`invalid "mortyhash" parameter`))
[1]144 return
145 }
146 }
147
148 parsedURI, err := url.Parse(string(requestURI))
149
[18]150 if strings.HasSuffix(parsedURI.Host, ".onion") {
151 p.serveMainPage(ctx, errors.New("Tor urls are not supported yet"))
152 return
153 }
154
[11]155 if err != nil {
156 p.serveMainPage(ctx, err)
[1]157 return
158 }
159
160 req := fasthttp.AcquireRequest()
161 defer fasthttp.ReleaseRequest(req)
[12]162 req.SetConnectionClose()
[1]163
164 reqQuery := parsedURI.Query()
165 ctx.QueryArgs().VisitAll(func(key, value []byte) {
166 reqQuery.Add(string(key), string(value))
167 })
168
169 parsedURI.RawQuery = reqQuery.Encode()
170
171 uriStr := parsedURI.String()
172
173 log.Println("getting", uriStr)
174
175 req.SetRequestURI(uriStr)
176 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
177
178 resp := fasthttp.AcquireResponse()
179 defer fasthttp.ReleaseResponse(resp)
180
181 req.Header.SetMethodBytes(ctx.Method())
182 if ctx.IsPost() || ctx.IsPut() {
183 req.SetBody(ctx.PostBody())
184 }
185
[11]186 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
187
188 if err != nil {
189 p.serveMainPage(ctx, err)
[1]190 return
191 }
192
193 if resp.StatusCode() != 200 {
194 switch resp.StatusCode() {
[7]195 case 301, 302, 303, 307, 308:
[1]196 loc := resp.Header.Peek("Location")
197 if loc != nil {
198 url, err := proxifyURI(&RequestConfig{p.Key, parsedURI}, string(loc))
199 if err == nil {
200 ctx.SetStatusCode(resp.StatusCode())
201 ctx.Response.Header.Add("Location", url)
202 log.Println("redirect to", string(loc))
203 return
204 }
205 }
206 }
207 log.Println("invalid request:", resp.StatusCode())
208 return
209 }
210
211 contentType := resp.Header.Peek("Content-Type")
212
213 if contentType == nil {
[11]214 p.serveMainPage(ctx, errors.New("invalid content type"))
[1]215 return
216 }
217
[17]218 if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
219 p.serveMainPage(ctx, errors.New("forbidden content type"))
220 return
221 }
222
[1]223 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
224
225 var responseBody []byte
226
227 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
228 var err error
229 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
[11]230 if err != nil {
231 p.serveMainPage(ctx, err)
[1]232 return
233 }
234 } else {
235 responseBody = resp.Body()
236 }
237
238 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
239
240 switch {
241 case bytes.Contains(contentType, []byte("css")):
242 sanitizeCSS(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
243 case bytes.Contains(contentType, []byte("html")):
244 sanitizeHTML(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
245 default:
246 ctx.Write(responseBody)
247 }
248}
249
[10]250func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]251 // serve robots.txt
[10]252 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
253 ctx.SetContentType("text/plain")
254 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
255 return true
256 }
[11]257
[10]258 return false
259}
260
[1]261func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
262 param := ctx.QueryArgs().PeekBytes(paramName)
263
264 if param == nil {
265 param = ctx.PostArgs().PeekBytes(paramName)
266 if param != nil {
267 ctx.PostArgs().DelBytes(paramName)
268 }
269 } else {
270 ctx.QueryArgs().DelBytes(paramName)
271 }
272
273 return param
274}
275
[9]276func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]277 // TODO
278
279 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
280
281 if urlSlices == nil {
[9]282 out.Write(css)
[1]283 return
284 }
285
286 startIndex := 0
287
288 for _, s := range urlSlices {
[15]289 urlStart := s[4]
290 urlEnd := s[5]
[1]291
292 if uri, err := proxifyURI(rc, string(css[urlStart:urlEnd])); err == nil {
[9]293 out.Write(css[startIndex:urlStart])
294 out.Write([]byte(uri))
[1]295 startIndex = urlEnd
296 } else {
297 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
298 }
299 }
300 if startIndex < len(css) {
[9]301 out.Write(css[startIndex:len(css)])
[1]302 }
303}
304
[9]305func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]306 r := bytes.NewReader(htmlDoc)
307 decoder := html.NewTokenizer(r)
308 decoder.AllowCDATA(true)
309
310 unsafeElements := make([][]byte, 0, 8)
311 state := STATE_DEFAULT
312
313 for {
314 token := decoder.Next()
315 if token == html.ErrorToken {
316 err := decoder.Err()
317 if err != io.EOF {
318 log.Println("failed to parse HTML:")
319 }
320 break
321 }
322
323 if len(unsafeElements) == 0 {
324
325 switch token {
326 case html.StartTagToken, html.SelfClosingTagToken:
327 tag, hasAttrs := decoder.TagName()
328 safe := !inArray(tag, UNSAFE_ELEMENTS)
329 if !safe {
330 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
331 var unsafeTag []byte = make([]byte, len(tag))
332 copy(unsafeTag, tag)
333 unsafeElements = append(unsafeElements, unsafeTag)
334 }
335 break
336 }
337 if bytes.Equal(tag, []byte("noscript")) {
338 state = STATE_IN_NOSCRIPT
339 break
340 }
341 var attrs [][][]byte
342 if hasAttrs {
343 for {
344 attrName, attrValue, moreAttr := decoder.TagAttr()
345 attrs = append(attrs, [][]byte{attrName, attrValue})
346 if !moreAttr {
347 break
348 }
349 }
[13]350 }
351 if bytes.Equal(tag, []byte("link")) {
352 sanitizeLinkTag(rc, out, attrs)
353 break
354 }
355
356 fmt.Fprintf(out, "<%s", tag)
357
358 if hasAttrs {
[1]359 if bytes.Equal(tag, []byte("meta")) {
[9]360 sanitizeMetaAttrs(rc, out, attrs)
[1]361 } else {
[9]362 sanitizeAttrs(rc, out, attrs)
[1]363 }
364 }
[13]365
[1]366 if token == html.SelfClosingTagToken {
[9]367 fmt.Fprintf(out, " />")
[1]368 } else {
[9]369 fmt.Fprintf(out, ">")
[1]370 if bytes.Equal(tag, []byte("style")) {
371 state = STATE_IN_STYLE
372 }
373 }
[13]374
[1]375 if bytes.Equal(tag, []byte("form")) {
376 var formURL *url.URL
377 for _, attr := range attrs {
378 if bytes.Equal(attr[0], []byte("action")) {
379 formURL, _ = url.Parse(string(attr[1]))
380 mergeURIs(rc.baseURL, formURL)
381 break
382 }
383 }
384 if formURL == nil {
385 formURL = rc.baseURL
386 }
[2]387 urlStr := formURL.String()
388 var key string
389 if rc.Key != nil {
390 key = hash(urlStr, rc.Key)
391 }
[9]392 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]393
394 }
395
396 case html.EndTagToken:
397 tag, _ := decoder.TagName()
398 writeEndTag := true
399 switch string(tag) {
400 case "body":
[9]401 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.baseURL.String())
[1]402 case "style":
403 state = STATE_DEFAULT
404 case "noscript":
405 state = STATE_DEFAULT
406 writeEndTag = false
407 }
408 // skip noscript tags - only the tag, not the content, because javascript is sanitized
409 if writeEndTag {
[9]410 fmt.Fprintf(out, "</%s>", tag)
[1]411 }
412
413 case html.TextToken:
414 switch state {
415 case STATE_DEFAULT:
[9]416 fmt.Fprintf(out, "%s", decoder.Raw())
[1]417 case STATE_IN_STYLE:
[9]418 sanitizeCSS(rc, out, decoder.Raw())
[1]419 case STATE_IN_NOSCRIPT:
[9]420 sanitizeHTML(rc, out, decoder.Raw())
[1]421 }
422
423 case html.DoctypeToken, html.CommentToken:
[9]424 out.Write(decoder.Raw())
[1]425 }
426 } else {
427 switch token {
428 case html.StartTagToken:
429 tag, _ := decoder.TagName()
430 if inArray(tag, UNSAFE_ELEMENTS) {
431 unsafeElements = append(unsafeElements, tag)
432 }
433
434 case html.EndTagToken:
435 tag, _ := decoder.TagName()
436 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
437 unsafeElements = unsafeElements[:len(unsafeElements)-1]
438 }
439 }
440 }
441 }
442}
443
[13]444func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
445 exclude := false
446 for _, attr := range attrs {
447 attrName := attr[0]
448 attrValue := attr[1]
449 if bytes.Equal(attrName, []byte("rel")) {
450 if bytes.Equal(attrValue, []byte("dns-prefetch")) {
451 exclude = true
452 break
453 }
454 }
455 if bytes.Equal(attrName, []byte("as")) {
456 if bytes.Equal(attrValue, []byte("script")) {
457 exclude = true
458 break
459 }
460 }
461 }
462
463 if !exclude {
464 out.Write([]byte("<link"))
465 for _, attr := range attrs {
466 sanitizeAttr(rc, out, attr[0], attr[1])
467 }
468 out.Write([]byte(">"))
469 }
470}
471
[9]472func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]473 var http_equiv []byte
474 var content []byte
475
476 for _, attr := range attrs {
477 attrName := attr[0]
478 attrValue := attr[1]
479 if bytes.Equal(attrName, []byte("http-equiv")) {
480 http_equiv = bytes.ToLower(attrValue)
481 }
482 if bytes.Equal(attrName, []byte("content")) {
483 content = attrValue
484 }
485 }
486
[14]487 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
488 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
489 contentUrl := content[urlIndex+4:]
490 if uri, err := proxifyURI(rc, string(contentUrl)); err == nil {
491 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]492 }
493 } else {
[9]494 sanitizeAttrs(rc, out, attrs)
[1]495 }
496
497}
498
[9]499func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]500 for _, attr := range attrs {
[9]501 sanitizeAttr(rc, out, attr[0], attr[1])
[1]502 }
503}
504
[9]505func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue []byte) {
[1]506 if inArray(attrName, SAFE_ATTRIBUTES) {
[9]507 fmt.Fprintf(out, " %s=\"%s\"", attrName, attrValue)
[1]508 return
509 }
510 switch string(attrName) {
511 case "src", "href", "action":
512 if uri, err := proxifyURI(rc, string(attrValue)); err == nil {
[9]513 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]514 } else {
515 log.Println("cannot proxify uri:", attrValue)
516 }
517 case "style":
[9]518 fmt.Fprintf(out, " %s=\"", attrName)
519 sanitizeCSS(rc, out, attrValue)
520 out.Write([]byte("\""))
[1]521 }
522}
523
524func mergeURIs(u1, u2 *url.URL) {
525 if u2.Scheme == "" || u2.Scheme == "//" {
526 u2.Scheme = u1.Scheme
527 }
[3]528 if u2.Host == "" && u1.Path != "" {
[1]529 u2.Host = u1.Host
530 if len(u2.Path) == 0 || u2.Path[0] != '/' {
531 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
532 }
533 }
534}
535
536func proxifyURI(rc *RequestConfig, uri string) (string, error) {
537 // TODO check malicious data: - e.g. data:script
538 if strings.HasPrefix(uri, "data:") {
539 return uri, nil
540 }
541
542 if len(uri) > 0 && uri[0] == '#' {
543 return uri, nil
544 }
545
546 u, err := url.Parse(uri)
547 if err != nil {
548 return "", err
549 }
550 mergeURIs(rc.baseURL, u)
551
552 uri = u.String()
553
554 if rc.Key == nil {
555 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
556 }
557 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
558}
559
560func inArray(b []byte, a [][]byte) bool {
561 for _, b2 := range a {
562 if bytes.Equal(b, b2) {
563 return true
564 }
565 }
566 return false
567}
568
569func hash(msg string, key []byte) string {
570 mac := hmac.New(sha256.New, key)
571 mac.Write([]byte(msg))
572 return hex.EncodeToString(mac.Sum(nil))
573}
574
575func verifyRequestURI(uri, hashMsg, key []byte) bool {
576 h := make([]byte, hex.DecodedLen(len(hashMsg)))
577 _, err := hex.Decode(h, hashMsg)
578 if err != nil {
579 log.Println("hmac error:", err)
580 return false
581 }
582 mac := hmac.New(sha256.New, key)
583 mac.Write(uri)
584 return hmac.Equal(h, mac.Sum(nil))
585}
586
[11]587func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, err error) {
[1]588 ctx.SetContentType("text/html")
589 ctx.Write([]byte(`<!doctype html>
590<head>
[11]591<title>MortyProxy</title>
592<style>
593body { font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
594input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
595a { text-decoration: none; #2980b9; }
596h1, h2 { font-weight: 200; margin-bottom: 2rem; }
597h1 { font-size: 3em; }
598.footer { position: absolute; bottom: 2em; width: 100%; }
599.footer p { font-size: 0.8em; }
600
601</style>
[1]602</head>
[11]603<body>
604 <h1>MortyProxy</h1>`))
605 if err != nil {
606 ctx.SetStatusCode(404)
607 log.Println("error:", err)
608 ctx.Write([]byte("<h2>Error: "))
609 ctx.Write([]byte(html.EscapeString(err.Error())))
610 ctx.Write([]byte("</h2>"))
611 } else {
612 ctx.SetStatusCode(200)
613 }
[1]614 if p.Key == nil {
615 ctx.Write([]byte(`
616<form action="post">
617 Visit url: <input placeholder="https://url.." name="mortyurl" />
618 <input type="submit" value="go" />
619</form>`))
[11]620 } else {
621 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]622 }
623 ctx.Write([]byte(`
[11]624<div class="footer">
625 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
626 <a href="https://github.com/asciimoo/morty">view on github</a>
627 </p>
628</div>
[1]629</body>
630</html>`))
631}
632
633func main() {
634
[2]635 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]636 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[4]637 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]638 flag.Parse()
639
[4]640 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]641
642 if *key != "" {
643 p.Key = []byte(*key)
644 }
645
646 log.Println("listening on", *listen)
647
648 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
649 log.Fatal("Error in ListenAndServe:", err)
650 }
651}
Note: See TracBrowser for help on using the repository browser.