source: code/trunk/morty.go@ 25

Last change on this file since 25 was 25, checked in by alex, 9 years ago

Fix #15

File size: 15.8 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "regexp"
15 "strings"
[4]16 "time"
[1]17
18 "github.com/valyala/fasthttp"
19 "golang.org/x/net/html"
20 "golang.org/x/text/encoding/charmap"
21)
22
23const (
24 STATE_DEFAULT int = 0
25 STATE_IN_STYLE int = 1
26 STATE_IN_NOSCRIPT int = 2
27)
28
29var CLIENT *fasthttp.Client = &fasthttp.Client{
30 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
31}
32
[15]33var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]34
35var UNSAFE_ELEMENTS [][]byte = [][]byte{
36 []byte("applet"),
37 []byte("canvas"),
38 []byte("embed"),
39 //[]byte("iframe"),
40 []byte("script"),
41}
42
43var SAFE_ATTRIBUTES [][]byte = [][]byte{
44 []byte("abbr"),
45 []byte("accesskey"),
46 []byte("align"),
47 []byte("alt"),
[13]48 []byte("as"),
[1]49 []byte("autocomplete"),
50 []byte("charset"),
51 []byte("checked"),
52 []byte("class"),
53 []byte("content"),
54 []byte("contenteditable"),
55 []byte("contextmenu"),
56 []byte("dir"),
57 []byte("for"),
58 []byte("height"),
59 []byte("hidden"),
60 []byte("id"),
61 []byte("lang"),
62 []byte("media"),
63 []byte("method"),
64 []byte("name"),
65 []byte("nowrap"),
66 []byte("placeholder"),
67 []byte("property"),
68 []byte("rel"),
69 []byte("spellcheck"),
70 []byte("tabindex"),
71 []byte("target"),
72 []byte("title"),
73 []byte("translate"),
74 []byte("type"),
75 []byte("value"),
76 []byte("width"),
77}
78
79var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
80 []byte("area"),
81 []byte("base"),
82 []byte("br"),
83 []byte("col"),
84 []byte("embed"),
85 []byte("hr"),
86 []byte("img"),
87 []byte("input"),
88 []byte("keygen"),
89 []byte("link"),
90 []byte("meta"),
91 []byte("param"),
92 []byte("source"),
93 []byte("track"),
94 []byte("wbr"),
95}
96
97type Proxy struct {
[4]98 Key []byte
99 RequestTimeout time.Duration
[1]100}
101
102type RequestConfig struct {
103 Key []byte
[23]104 BaseURL *url.URL
[1]105}
106
[2]107var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]108
109var HTML_BODY_EXTENSION string = `
110<div id="mortyheader">
111 <input type="checkbox" id="mortytoggle" autocomplete="off" />
[5]112 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
[1]113</div>
114<style>
[5]115#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
[1]116#mortyheader a { color: #3498db; }
[5]117#mortyheader p { padding: 0; margin: 0; }
118#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
119#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
[1]120input[type=checkbox]#mortytoggle { display: none; }
121input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
122</style>
123`
124
125func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]126
127 if appRequestHandler(ctx) {
128 return
129 }
130
[1]131 requestHash := popRequestParam(ctx, []byte("mortyhash"))
132
133 requestURI := popRequestParam(ctx, []byte("mortyurl"))
134
135 if requestURI == nil {
[11]136 p.serveMainPage(ctx, nil)
[1]137 return
138 }
139
140 if p.Key != nil {
141 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[11]142 p.serveMainPage(ctx, errors.New(`invalid "mortyhash" parameter`))
[1]143 return
144 }
145 }
146
147 parsedURI, err := url.Parse(string(requestURI))
148
[18]149 if strings.HasSuffix(parsedURI.Host, ".onion") {
150 p.serveMainPage(ctx, errors.New("Tor urls are not supported yet"))
151 return
152 }
153
[11]154 if err != nil {
155 p.serveMainPage(ctx, err)
[1]156 return
157 }
158
159 req := fasthttp.AcquireRequest()
160 defer fasthttp.ReleaseRequest(req)
[12]161 req.SetConnectionClose()
[1]162
163 reqQuery := parsedURI.Query()
164 ctx.QueryArgs().VisitAll(func(key, value []byte) {
165 reqQuery.Add(string(key), string(value))
166 })
167
168 parsedURI.RawQuery = reqQuery.Encode()
169
170 uriStr := parsedURI.String()
171
172 log.Println("getting", uriStr)
173
174 req.SetRequestURI(uriStr)
175 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
176
177 resp := fasthttp.AcquireResponse()
178 defer fasthttp.ReleaseResponse(resp)
179
180 req.Header.SetMethodBytes(ctx.Method())
181 if ctx.IsPost() || ctx.IsPut() {
182 req.SetBody(ctx.PostBody())
183 }
184
[11]185 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
186
187 if err != nil {
188 p.serveMainPage(ctx, err)
[1]189 return
190 }
191
192 if resp.StatusCode() != 200 {
193 switch resp.StatusCode() {
[7]194 case 301, 302, 303, 307, 308:
[1]195 loc := resp.Header.Peek("Location")
196 if loc != nil {
[23]197 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
198 url, err := rc.ProxifyURI(string(loc))
[1]199 if err == nil {
200 ctx.SetStatusCode(resp.StatusCode())
201 ctx.Response.Header.Add("Location", url)
202 log.Println("redirect to", string(loc))
203 return
204 }
205 }
206 }
207 log.Println("invalid request:", resp.StatusCode())
208 return
209 }
210
211 contentType := resp.Header.Peek("Content-Type")
212
213 if contentType == nil {
[11]214 p.serveMainPage(ctx, errors.New("invalid content type"))
[1]215 return
216 }
217
[17]218 if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
219 p.serveMainPage(ctx, errors.New("forbidden content type"))
220 return
221 }
222
[1]223 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
224
225 var responseBody []byte
226
227 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
228 var err error
229 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
[11]230 if err != nil {
231 p.serveMainPage(ctx, err)
[1]232 return
233 }
234 } else {
235 responseBody = resp.Body()
236 }
237
238 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
239
240 switch {
241 case bytes.Contains(contentType, []byte("css")):
[23]242 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]243 case bytes.Contains(contentType, []byte("html")):
[23]244 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]245 default:
246 ctx.Write(responseBody)
247 }
248}
249
[10]250func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]251 // serve robots.txt
[10]252 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
253 ctx.SetContentType("text/plain")
254 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
255 return true
256 }
[11]257
[10]258 return false
259}
260
[1]261func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
262 param := ctx.QueryArgs().PeekBytes(paramName)
263
264 if param == nil {
265 param = ctx.PostArgs().PeekBytes(paramName)
266 if param != nil {
267 ctx.PostArgs().DelBytes(paramName)
268 }
269 } else {
270 ctx.QueryArgs().DelBytes(paramName)
271 }
272
273 return param
274}
275
[9]276func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]277 // TODO
278
279 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
280
281 if urlSlices == nil {
[9]282 out.Write(css)
[1]283 return
284 }
285
286 startIndex := 0
287
288 for _, s := range urlSlices {
[15]289 urlStart := s[4]
290 urlEnd := s[5]
[1]291
[23]292 if uri, err := rc.ProxifyURI(string(css[urlStart:urlEnd])); err == nil {
[9]293 out.Write(css[startIndex:urlStart])
294 out.Write([]byte(uri))
[1]295 startIndex = urlEnd
296 } else {
297 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
298 }
299 }
300 if startIndex < len(css) {
[9]301 out.Write(css[startIndex:len(css)])
[1]302 }
303}
304
[9]305func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]306 r := bytes.NewReader(htmlDoc)
307 decoder := html.NewTokenizer(r)
308 decoder.AllowCDATA(true)
309
310 unsafeElements := make([][]byte, 0, 8)
311 state := STATE_DEFAULT
312
313 for {
314 token := decoder.Next()
315 if token == html.ErrorToken {
316 err := decoder.Err()
317 if err != io.EOF {
318 log.Println("failed to parse HTML:")
319 }
320 break
321 }
322
323 if len(unsafeElements) == 0 {
324
325 switch token {
326 case html.StartTagToken, html.SelfClosingTagToken:
327 tag, hasAttrs := decoder.TagName()
328 safe := !inArray(tag, UNSAFE_ELEMENTS)
329 if !safe {
330 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
331 var unsafeTag []byte = make([]byte, len(tag))
332 copy(unsafeTag, tag)
333 unsafeElements = append(unsafeElements, unsafeTag)
334 }
335 break
336 }
337 if bytes.Equal(tag, []byte("noscript")) {
338 state = STATE_IN_NOSCRIPT
339 break
340 }
341 var attrs [][][]byte
342 if hasAttrs {
343 for {
344 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]345 attrs = append(attrs, [][]byte{
346 attrName,
347 attrValue,
348 []byte(html.EscapeString(string(attrValue))),
349 })
[1]350 if !moreAttr {
351 break
352 }
353 }
[13]354 }
355 if bytes.Equal(tag, []byte("link")) {
356 sanitizeLinkTag(rc, out, attrs)
357 break
358 }
359
360 fmt.Fprintf(out, "<%s", tag)
361
362 if hasAttrs {
[1]363 if bytes.Equal(tag, []byte("meta")) {
[9]364 sanitizeMetaAttrs(rc, out, attrs)
[1]365 } else {
[9]366 sanitizeAttrs(rc, out, attrs)
[1]367 }
368 }
[13]369
[1]370 if token == html.SelfClosingTagToken {
[9]371 fmt.Fprintf(out, " />")
[1]372 } else {
[9]373 fmt.Fprintf(out, ">")
[1]374 if bytes.Equal(tag, []byte("style")) {
375 state = STATE_IN_STYLE
376 }
377 }
[13]378
[1]379 if bytes.Equal(tag, []byte("form")) {
380 var formURL *url.URL
381 for _, attr := range attrs {
382 if bytes.Equal(attr[0], []byte("action")) {
383 formURL, _ = url.Parse(string(attr[1]))
[25]384 formURL = mergeURIs(rc.BaseURL, formURL)
[1]385 break
386 }
387 }
388 if formURL == nil {
[23]389 formURL = rc.BaseURL
[1]390 }
[2]391 urlStr := formURL.String()
392 var key string
393 if rc.Key != nil {
394 key = hash(urlStr, rc.Key)
395 }
[9]396 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]397
398 }
399
400 case html.EndTagToken:
401 tag, _ := decoder.TagName()
402 writeEndTag := true
403 switch string(tag) {
404 case "body":
[23]405 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]406 case "style":
407 state = STATE_DEFAULT
408 case "noscript":
409 state = STATE_DEFAULT
410 writeEndTag = false
411 }
412 // skip noscript tags - only the tag, not the content, because javascript is sanitized
413 if writeEndTag {
[9]414 fmt.Fprintf(out, "</%s>", tag)
[1]415 }
416
417 case html.TextToken:
418 switch state {
419 case STATE_DEFAULT:
[9]420 fmt.Fprintf(out, "%s", decoder.Raw())
[1]421 case STATE_IN_STYLE:
[9]422 sanitizeCSS(rc, out, decoder.Raw())
[1]423 case STATE_IN_NOSCRIPT:
[9]424 sanitizeHTML(rc, out, decoder.Raw())
[1]425 }
426
427 case html.DoctypeToken, html.CommentToken:
[9]428 out.Write(decoder.Raw())
[1]429 }
430 } else {
431 switch token {
432 case html.StartTagToken:
433 tag, _ := decoder.TagName()
434 if inArray(tag, UNSAFE_ELEMENTS) {
435 unsafeElements = append(unsafeElements, tag)
436 }
437
438 case html.EndTagToken:
439 tag, _ := decoder.TagName()
440 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
441 unsafeElements = unsafeElements[:len(unsafeElements)-1]
442 }
443 }
444 }
445 }
446}
447
[13]448func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
449 exclude := false
450 for _, attr := range attrs {
451 attrName := attr[0]
452 attrValue := attr[1]
453 if bytes.Equal(attrName, []byte("rel")) {
454 if bytes.Equal(attrValue, []byte("dns-prefetch")) {
455 exclude = true
456 break
457 }
458 }
459 if bytes.Equal(attrName, []byte("as")) {
460 if bytes.Equal(attrValue, []byte("script")) {
461 exclude = true
462 break
463 }
464 }
465 }
466
467 if !exclude {
468 out.Write([]byte("<link"))
469 for _, attr := range attrs {
[21]470 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]471 }
472 out.Write([]byte(">"))
473 }
474}
475
[9]476func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]477 var http_equiv []byte
478 var content []byte
479
480 for _, attr := range attrs {
481 attrName := attr[0]
482 attrValue := attr[1]
483 if bytes.Equal(attrName, []byte("http-equiv")) {
484 http_equiv = bytes.ToLower(attrValue)
485 }
486 if bytes.Equal(attrName, []byte("content")) {
487 content = attrValue
488 }
489 }
490
[14]491 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
492 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
493 contentUrl := content[urlIndex+4:]
[23]494 if uri, err := rc.ProxifyURI(string(contentUrl)); err == nil {
[14]495 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]496 }
497 } else {
[9]498 sanitizeAttrs(rc, out, attrs)
[1]499 }
500
501}
502
[9]503func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]504 for _, attr := range attrs {
[21]505 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]506 }
507}
508
[21]509func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]510 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]511 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]512 return
513 }
514 switch string(attrName) {
515 case "src", "href", "action":
[23]516 if uri, err := rc.ProxifyURI(string(attrValue)); err == nil {
[9]517 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]518 } else {
519 log.Println("cannot proxify uri:", attrValue)
520 }
521 case "style":
[21]522 cssAttr := bytes.NewBuffer(nil)
523 sanitizeCSS(rc, cssAttr, attrValue)
524 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]525 }
526}
527
[25]528func mergeURIs(u1, u2 *url.URL) (*url.URL) {
529 return u1.ResolveReference(u2)
[1]530}
531
[23]532func (rc *RequestConfig) ProxifyURI(uri string) (string, error) {
[25]533 // remove javascript protocol
534 if strings.HasPrefix(uri, "javascript:") {
535 return "", nil
536 }
[1]537 // TODO check malicious data: - e.g. data:script
538 if strings.HasPrefix(uri, "data:") {
539 return uri, nil
540 }
541
542 if len(uri) > 0 && uri[0] == '#' {
543 return uri, nil
544 }
545
546 u, err := url.Parse(uri)
547 if err != nil {
548 return "", err
549 }
[25]550 u = mergeURIs(rc.BaseURL, u)
[1]551
552 uri = u.String()
553
554 if rc.Key == nil {
555 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
556 }
557 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
558}
559
560func inArray(b []byte, a [][]byte) bool {
561 for _, b2 := range a {
562 if bytes.Equal(b, b2) {
563 return true
564 }
565 }
566 return false
567}
568
569func hash(msg string, key []byte) string {
570 mac := hmac.New(sha256.New, key)
571 mac.Write([]byte(msg))
572 return hex.EncodeToString(mac.Sum(nil))
573}
574
575func verifyRequestURI(uri, hashMsg, key []byte) bool {
576 h := make([]byte, hex.DecodedLen(len(hashMsg)))
577 _, err := hex.Decode(h, hashMsg)
578 if err != nil {
579 log.Println("hmac error:", err)
580 return false
581 }
582 mac := hmac.New(sha256.New, key)
583 mac.Write(uri)
584 return hmac.Equal(h, mac.Sum(nil))
585}
586
[11]587func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, err error) {
[1]588 ctx.SetContentType("text/html")
589 ctx.Write([]byte(`<!doctype html>
590<head>
[11]591<title>MortyProxy</title>
592<style>
593body { font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
594input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
595a { text-decoration: none; #2980b9; }
596h1, h2 { font-weight: 200; margin-bottom: 2rem; }
597h1 { font-size: 3em; }
598.footer { position: absolute; bottom: 2em; width: 100%; }
599.footer p { font-size: 0.8em; }
600
601</style>
[1]602</head>
[11]603<body>
604 <h1>MortyProxy</h1>`))
605 if err != nil {
606 ctx.SetStatusCode(404)
607 log.Println("error:", err)
608 ctx.Write([]byte("<h2>Error: "))
609 ctx.Write([]byte(html.EscapeString(err.Error())))
610 ctx.Write([]byte("</h2>"))
611 } else {
612 ctx.SetStatusCode(200)
613 }
[1]614 if p.Key == nil {
615 ctx.Write([]byte(`
616<form action="post">
617 Visit url: <input placeholder="https://url.." name="mortyurl" />
618 <input type="submit" value="go" />
619</form>`))
[11]620 } else {
621 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]622 }
623 ctx.Write([]byte(`
[11]624<div class="footer">
625 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
626 <a href="https://github.com/asciimoo/morty">view on github</a>
627 </p>
628</div>
[1]629</body>
630</html>`))
631}
632
633func main() {
634
[2]635 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]636 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[24]637 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[4]638 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]639 flag.Parse()
640
[24]641 if *ipv6 {
642 CLIENT.Dial = fasthttp.DialDualStack
643 }
644
[4]645 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]646
647 if *key != "" {
648 p.Key = []byte(*key)
649 }
650
651 log.Println("listening on", *listen)
652
653 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
654 log.Fatal("Error in ListenAndServe:", err)
655 }
656}
Note: See TracBrowser for help on using the repository browser.