source: code/trunk/morty.go@ 40

Last change on this file since 40 was 40, checked in by alex, 9 years ago

[fix] fix infinite loop with <base target=".." />

File size: 17.6 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "regexp"
15 "strings"
16 "time"
17
18 "github.com/valyala/fasthttp"
19 "golang.org/x/net/html"
20 "golang.org/x/text/encoding/charmap"
21)
22
23const (
24 STATE_DEFAULT int = 0
25 STATE_IN_STYLE int = 1
26 STATE_IN_NOSCRIPT int = 2
27)
28
29var CLIENT *fasthttp.Client = &fasthttp.Client{
30 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
31}
32
33var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
34
35var UNSAFE_ELEMENTS [][]byte = [][]byte{
36 []byte("applet"),
37 []byte("canvas"),
38 []byte("embed"),
39 //[]byte("iframe"),
40 []byte("script"),
41}
42
43var SAFE_ATTRIBUTES [][]byte = [][]byte{
44 []byte("abbr"),
45 []byte("accesskey"),
46 []byte("align"),
47 []byte("alt"),
48 []byte("as"),
49 []byte("autocomplete"),
50 []byte("charset"),
51 []byte("checked"),
52 []byte("class"),
53 []byte("content"),
54 []byte("contenteditable"),
55 []byte("contextmenu"),
56 []byte("dir"),
57 []byte("for"),
58 []byte("height"),
59 []byte("hidden"),
60 []byte("id"),
61 []byte("lang"),
62 []byte("media"),
63 []byte("method"),
64 []byte("name"),
65 []byte("nowrap"),
66 []byte("placeholder"),
67 []byte("property"),
68 []byte("rel"),
69 []byte("spellcheck"),
70 []byte("tabindex"),
71 []byte("target"),
72 []byte("title"),
73 []byte("translate"),
74 []byte("type"),
75 []byte("value"),
76 []byte("width"),
77}
78
79var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
80 []byte("area"),
81 []byte("base"),
82 []byte("br"),
83 []byte("col"),
84 []byte("embed"),
85 []byte("hr"),
86 []byte("img"),
87 []byte("input"),
88 []byte("keygen"),
89 []byte("link"),
90 []byte("meta"),
91 []byte("param"),
92 []byte("source"),
93 []byte("track"),
94 []byte("wbr"),
95}
96
97type Proxy struct {
98 Key []byte
99 RequestTimeout time.Duration
100}
101
102type RequestConfig struct {
103 Key []byte
104 BaseURL *url.URL
105}
106
107var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
108
109var HTML_BODY_EXTENSION string = `
110<div id="mortyheader">
111 <input type="checkbox" id="mortytoggle" autocomplete="off" />
112 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p></div>
113</div>
114<style>
115#mortyheader { position: fixed; padding: 12px 12px 12px 0; margin: 0; box-sizing: content-box; top: 15%%; left: 0; max-width: 140px; color: #444; overflow: hidden; z-index: 110000; font-size: 12px; line-height: normal; }
116#mortyheader a { color: #3498db; font-weight: bold; }
117#mortyheader p { padding: 0 0 0.7em 0; margin: 0; }
118#mortyheader > div { padding: 8px; font-size: 12px !important; font-family: sans !important; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
119#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
120input[type=checkbox]#mortytoggle { display: none; }
121input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
122</style>
123`
124
125func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
126
127 if appRequestHandler(ctx) {
128 return
129 }
130
131 requestHash := popRequestParam(ctx, []byte("mortyhash"))
132
133 requestURI := popRequestParam(ctx, []byte("mortyurl"))
134
135 if requestURI == nil {
136 p.serveMainPage(ctx, 200, nil)
137 return
138 }
139
140 if p.Key != nil {
141 if !verifyRequestURI(requestURI, requestHash, p.Key) {
142 // HTTP status code 403 : Forbidden
143 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
144 return
145 }
146 }
147
148 parsedURI, err := url.Parse(string(requestURI))
149
150 if strings.HasSuffix(parsedURI.Host, ".onion") {
151 // HTTP status code 501 : Not Implemented
152 p.serveMainPage(ctx, 501, errors.New("Tor urls are not supported yet"))
153 return
154 }
155
156 if err != nil {
157 // HTTP status code 500 : Internal Server Error
158 p.serveMainPage(ctx, 500, err)
159 return
160 }
161
162 req := fasthttp.AcquireRequest()
163 defer fasthttp.ReleaseRequest(req)
164 req.SetConnectionClose()
165
166 reqQuery := parsedURI.Query()
167 ctx.QueryArgs().VisitAll(func(key, value []byte) {
168 reqQuery.Add(string(key), string(value))
169 })
170
171 parsedURI.RawQuery = reqQuery.Encode()
172
173 uriStr := parsedURI.String()
174
175 log.Println("getting", uriStr)
176
177 req.SetRequestURI(uriStr)
178 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
179
180 resp := fasthttp.AcquireResponse()
181 defer fasthttp.ReleaseResponse(resp)
182
183 req.Header.SetMethodBytes(ctx.Method())
184 if ctx.IsPost() || ctx.IsPut() {
185 req.SetBody(ctx.PostBody())
186 }
187
188 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
189
190 if err != nil {
191 if err == fasthttp.ErrTimeout {
192 // HTTP status code 504 : Gateway Time-Out
193 p.serveMainPage(ctx, 504, err)
194 } else {
195 // HTTP status code 500 : Internal Server Error
196 p.serveMainPage(ctx, 500, err)
197 }
198 return
199 }
200
201 if resp.StatusCode() != 200 {
202 switch resp.StatusCode() {
203 case 301, 302, 303, 307, 308:
204 loc := resp.Header.Peek("Location")
205 if loc != nil {
206 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
207 url, err := rc.ProxifyURI(string(loc))
208 if err == nil {
209 ctx.SetStatusCode(resp.StatusCode())
210 ctx.Response.Header.Add("Location", url)
211 log.Println("redirect to", string(loc))
212 return
213 }
214 }
215 }
216 error_message := fmt.Sprintf("invalid response: %d", resp.StatusCode())
217 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
218 return
219 }
220
221 contentType := resp.Header.Peek("Content-Type")
222
223 if contentType == nil {
224 // HTTP status code 503 : Service Unavailable
225 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
226 return
227 }
228
229 if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
230 // HTTP status code 403 : Forbidden
231 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
232 return
233 }
234
235 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
236
237 var responseBody []byte
238
239 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
240 var err error
241 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
242 if err != nil {
243 // HTTP status code 503 : Service Unavailable
244 p.serveMainPage(ctx, 503, err)
245 return
246 }
247 } else {
248 responseBody = resp.Body()
249 }
250
251 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
252
253 switch {
254 case bytes.Contains(contentType, []byte("css")):
255 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
256 case bytes.Contains(contentType, []byte("html")):
257 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
258 default:
259 if ctx.Request.Header.Peek("Content-Disposition") != nil {
260 ctx.Response.Header.AddBytesV("Content-Disposition", ctx.Request.Header.Peek("Content-Disposition"))
261 }
262 ctx.Write(responseBody)
263 }
264}
265
266func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
267 // serve robots.txt
268 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
269 ctx.SetContentType("text/plain")
270 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
271 return true
272 }
273
274 return false
275}
276
277func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
278 param := ctx.QueryArgs().PeekBytes(paramName)
279
280 if param == nil {
281 param = ctx.PostArgs().PeekBytes(paramName)
282 if param != nil {
283 ctx.PostArgs().DelBytes(paramName)
284 }
285 } else {
286 ctx.QueryArgs().DelBytes(paramName)
287 }
288
289 return param
290}
291
292func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
293 // TODO
294
295 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
296
297 if urlSlices == nil {
298 out.Write(css)
299 return
300 }
301
302 startIndex := 0
303
304 for _, s := range urlSlices {
305 urlStart := s[4]
306 urlEnd := s[5]
307
308 if uri, err := rc.ProxifyURI(string(css[urlStart:urlEnd])); err == nil {
309 out.Write(css[startIndex:urlStart])
310 out.Write([]byte(uri))
311 startIndex = urlEnd
312 } else {
313 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
314 }
315 }
316 if startIndex < len(css) {
317 out.Write(css[startIndex:len(css)])
318 }
319}
320
321func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
322 r := bytes.NewReader(htmlDoc)
323 decoder := html.NewTokenizer(r)
324 decoder.AllowCDATA(true)
325
326 unsafeElements := make([][]byte, 0, 8)
327 state := STATE_DEFAULT
328
329 for {
330 token := decoder.Next()
331 if token == html.ErrorToken {
332 err := decoder.Err()
333 if err != io.EOF {
334 log.Println("failed to parse HTML:")
335 }
336 break
337 }
338
339 if len(unsafeElements) == 0 {
340
341 switch token {
342 case html.StartTagToken, html.SelfClosingTagToken:
343 tag, hasAttrs := decoder.TagName()
344 safe := !inArray(tag, UNSAFE_ELEMENTS)
345 if !safe {
346 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
347 var unsafeTag []byte = make([]byte, len(tag))
348 copy(unsafeTag, tag)
349 unsafeElements = append(unsafeElements, unsafeTag)
350 }
351 break
352 }
353 if bytes.Equal(tag, []byte("base")) {
354 for {
355 attrName, attrValue, moreAttr := decoder.TagAttr()
356 if bytes.Equal(attrName, []byte("href")) {
357 parsedURI, err := url.Parse(string(attrValue))
358 if err == nil {
359 rc.BaseURL = parsedURI
360 }
361 }
362 if !moreAttr {
363 break
364 }
365 }
366 break
367 }
368 if bytes.Equal(tag, []byte("noscript")) {
369 state = STATE_IN_NOSCRIPT
370 break
371 }
372 var attrs [][][]byte
373 if hasAttrs {
374 for {
375 attrName, attrValue, moreAttr := decoder.TagAttr()
376 attrs = append(attrs, [][]byte{
377 attrName,
378 attrValue,
379 []byte(html.EscapeString(string(attrValue))),
380 })
381 if !moreAttr {
382 break
383 }
384 }
385 }
386 if bytes.Equal(tag, []byte("link")) {
387 sanitizeLinkTag(rc, out, attrs)
388 break
389 }
390
391 fmt.Fprintf(out, "<%s", tag)
392
393 if hasAttrs {
394 if bytes.Equal(tag, []byte("meta")) {
395 sanitizeMetaAttrs(rc, out, attrs)
396 } else {
397 sanitizeAttrs(rc, out, attrs)
398 }
399 }
400
401 if token == html.SelfClosingTagToken {
402 fmt.Fprintf(out, " />")
403 } else {
404 fmt.Fprintf(out, ">")
405 if bytes.Equal(tag, []byte("style")) {
406 state = STATE_IN_STYLE
407 }
408 }
409
410 if bytes.Equal(tag, []byte("form")) {
411 var formURL *url.URL
412 for _, attr := range attrs {
413 if bytes.Equal(attr[0], []byte("action")) {
414 formURL, _ = url.Parse(string(attr[1]))
415 formURL = mergeURIs(rc.BaseURL, formURL)
416 break
417 }
418 }
419 if formURL == nil {
420 formURL = rc.BaseURL
421 }
422 urlStr := formURL.String()
423 var key string
424 if rc.Key != nil {
425 key = hash(urlStr, rc.Key)
426 }
427 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
428
429 }
430
431 case html.EndTagToken:
432 tag, _ := decoder.TagName()
433 writeEndTag := true
434 switch string(tag) {
435 case "body":
436 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
437 case "style":
438 state = STATE_DEFAULT
439 case "noscript":
440 state = STATE_DEFAULT
441 writeEndTag = false
442 }
443 // skip noscript tags - only the tag, not the content, because javascript is sanitized
444 if writeEndTag {
445 fmt.Fprintf(out, "</%s>", tag)
446 }
447
448 case html.TextToken:
449 switch state {
450 case STATE_DEFAULT:
451 fmt.Fprintf(out, "%s", decoder.Raw())
452 case STATE_IN_STYLE:
453 sanitizeCSS(rc, out, decoder.Raw())
454 case STATE_IN_NOSCRIPT:
455 sanitizeHTML(rc, out, decoder.Raw())
456 }
457
458 case html.DoctypeToken, html.CommentToken:
459 out.Write(decoder.Raw())
460 }
461 } else {
462 switch token {
463 case html.StartTagToken:
464 tag, _ := decoder.TagName()
465 if inArray(tag, UNSAFE_ELEMENTS) {
466 unsafeElements = append(unsafeElements, tag)
467 }
468
469 case html.EndTagToken:
470 tag, _ := decoder.TagName()
471 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
472 unsafeElements = unsafeElements[:len(unsafeElements)-1]
473 }
474 }
475 }
476 }
477}
478
479func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
480 exclude := false
481 for _, attr := range attrs {
482 attrName := attr[0]
483 attrValue := attr[1]
484 if bytes.Equal(attrName, []byte("rel")) {
485 if bytes.Equal(attrValue, []byte("dns-prefetch")) {
486 exclude = true
487 break
488 }
489 }
490 if bytes.Equal(attrName, []byte("as")) {
491 if bytes.Equal(attrValue, []byte("script")) {
492 exclude = true
493 break
494 }
495 }
496 }
497
498 if !exclude {
499 out.Write([]byte("<link"))
500 for _, attr := range attrs {
501 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
502 }
503 out.Write([]byte(">"))
504 }
505}
506
507func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
508 var http_equiv []byte
509 var content []byte
510
511 for _, attr := range attrs {
512 attrName := attr[0]
513 attrValue := attr[1]
514 if bytes.Equal(attrName, []byte("http-equiv")) {
515 http_equiv = bytes.ToLower(attrValue)
516 }
517 if bytes.Equal(attrName, []byte("content")) {
518 content = attrValue
519 }
520 }
521
522 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
523 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
524 contentUrl := content[urlIndex+4:]
525 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
526 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
527 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
528 contentUrl = contentUrl[1 : len(contentUrl)-1]
529 }
530 }
531 // output proxify result
532 if uri, err := rc.ProxifyURI(string(contentUrl)); err == nil {
533 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
534 }
535 } else {
536 sanitizeAttrs(rc, out, attrs)
537 }
538
539}
540
541func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
542 for _, attr := range attrs {
543 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
544 }
545}
546
547func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
548 if inArray(attrName, SAFE_ATTRIBUTES) {
549 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
550 return
551 }
552 switch string(attrName) {
553 case "src", "href", "action":
554 if uri, err := rc.ProxifyURI(string(attrValue)); err == nil {
555 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
556 } else {
557 log.Println("cannot proxify uri:", string(attrValue))
558 }
559 case "style":
560 cssAttr := bytes.NewBuffer(nil)
561 sanitizeCSS(rc, cssAttr, attrValue)
562 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
563 }
564}
565
566func mergeURIs(u1, u2 *url.URL) *url.URL {
567 return u1.ResolveReference(u2)
568}
569
570func (rc *RequestConfig) ProxifyURI(uri string) (string, error) {
571 // remove javascript protocol
572 if strings.HasPrefix(uri, "javascript:") {
573 return "", nil
574 }
575 // TODO check malicious data: - e.g. data:script
576 if strings.HasPrefix(uri, "data:") {
577 return uri, nil
578 }
579
580 if len(uri) > 0 && uri[0] == '#' {
581 return uri, nil
582 }
583
584 u, err := url.Parse(uri)
585 if err != nil {
586 return "", err
587 }
588 u = mergeURIs(rc.BaseURL, u)
589
590 uri = u.String()
591
592 if rc.Key == nil {
593 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
594 }
595 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
596}
597
598func inArray(b []byte, a [][]byte) bool {
599 for _, b2 := range a {
600 if bytes.Equal(b, b2) {
601 return true
602 }
603 }
604 return false
605}
606
607func hash(msg string, key []byte) string {
608 mac := hmac.New(sha256.New, key)
609 mac.Write([]byte(msg))
610 return hex.EncodeToString(mac.Sum(nil))
611}
612
613func verifyRequestURI(uri, hashMsg, key []byte) bool {
614 h := make([]byte, hex.DecodedLen(len(hashMsg)))
615 _, err := hex.Decode(h, hashMsg)
616 if err != nil {
617 log.Println("hmac error:", err)
618 return false
619 }
620 mac := hmac.New(sha256.New, key)
621 mac.Write(uri)
622 return hmac.Equal(h, mac.Sum(nil))
623}
624
625func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
626 ctx.SetContentType("text/html")
627 ctx.SetStatusCode(statusCode)
628 ctx.Write([]byte(`<!doctype html>
629<head>
630<title>MortyProxy</title>
631<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
632<style>
633html { height: 100%; }
634body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
635input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
636input[placeholder] { width:80%; }
637a { text-decoration: none; #2980b9; }
638h1, h2 { font-weight: 200; margin-bottom: 2rem; }
639h1 { font-size: 3em; }
640.container { flex:1; min-height: 100%; margin-bottom: 1em; }
641.footer { margin: 1em; }
642.footer p { font-size: 0.8em; }
643</style>
644</head>
645<body>
646 <div class="container">
647 <h1>MortyProxy</h1>
648`))
649 if err != nil {
650 log.Println("error:", err)
651 ctx.Write([]byte("<h2>Error: "))
652 ctx.Write([]byte(html.EscapeString(err.Error())))
653 ctx.Write([]byte("</h2>"))
654 }
655 if p.Key == nil {
656 ctx.Write([]byte(`
657 <form action="post">
658 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
659 <input type="submit" value="go" />
660 </form>`))
661 } else {
662 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
663 }
664 ctx.Write([]byte(`
665 </div>
666 <div class="footer">
667 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
668 <a href="https://github.com/asciimoo/morty">view on github</a>
669 </p>
670 </div>
671</body>
672</html>`))
673}
674
675func main() {
676
677 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
678 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
679 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
680 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
681 flag.Parse()
682
683 if *ipv6 {
684 CLIENT.Dial = fasthttp.DialDualStack
685 }
686
687 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
688
689 if *key != "" {
690 p.Key = []byte(*key)
691 }
692
693 log.Println("listening on", *listen)
694
695 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
696 log.Fatal("Error in ListenAndServe:", err)
697 }
698}
Note: See TracBrowser for help on using the repository browser.