source: code/trunk/morty.go@ 14

Last change on this file since 14 was 14, checked in by asciimoo, 9 years ago

[fix] case insensitive http-equiv refresh redirect handling - fixes #5

File size: 15.3 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
[4]17 "time"
[1]18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
31 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
32}
33
[8]34var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("(url\\(|@import +)(['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]35
36var UNSAFE_ELEMENTS [][]byte = [][]byte{
37 []byte("applet"),
38 []byte("canvas"),
39 []byte("embed"),
40 //[]byte("iframe"),
41 []byte("script"),
42}
43
44var SAFE_ATTRIBUTES [][]byte = [][]byte{
45 []byte("abbr"),
46 []byte("accesskey"),
47 []byte("align"),
48 []byte("alt"),
[13]49 []byte("as"),
[1]50 []byte("autocomplete"),
51 []byte("charset"),
52 []byte("checked"),
53 []byte("class"),
54 []byte("content"),
55 []byte("contenteditable"),
56 []byte("contextmenu"),
57 []byte("dir"),
58 []byte("for"),
59 []byte("height"),
60 []byte("hidden"),
61 []byte("id"),
62 []byte("lang"),
63 []byte("media"),
64 []byte("method"),
65 []byte("name"),
66 []byte("nowrap"),
67 []byte("placeholder"),
68 []byte("property"),
69 []byte("rel"),
70 []byte("spellcheck"),
71 []byte("tabindex"),
72 []byte("target"),
73 []byte("title"),
74 []byte("translate"),
75 []byte("type"),
76 []byte("value"),
77 []byte("width"),
78}
79
80var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
81 []byte("area"),
82 []byte("base"),
83 []byte("br"),
84 []byte("col"),
85 []byte("embed"),
86 []byte("hr"),
87 []byte("img"),
88 []byte("input"),
89 []byte("keygen"),
90 []byte("link"),
91 []byte("meta"),
92 []byte("param"),
93 []byte("source"),
94 []byte("track"),
95 []byte("wbr"),
96}
97
98type Proxy struct {
[4]99 Key []byte
100 RequestTimeout time.Duration
[1]101}
102
103type RequestConfig struct {
104 Key []byte
105 baseURL *url.URL
106}
107
[2]108var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]109
110var HTML_BODY_EXTENSION string = `
111<div id="mortyheader">
112 <input type="checkbox" id="mortytoggle" autocomplete="off" />
[5]113 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
[1]114</div>
115<style>
[5]116#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
[1]117#mortyheader a { color: #3498db; }
[5]118#mortyheader p { padding: 0; margin: 0; }
119#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
120#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
[1]121input[type=checkbox]#mortytoggle { display: none; }
122input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
123</style>
124`
125
126func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]127
128 if appRequestHandler(ctx) {
129 return
130 }
131
[1]132 requestHash := popRequestParam(ctx, []byte("mortyhash"))
133
134 requestURI := popRequestParam(ctx, []byte("mortyurl"))
135
136 if requestURI == nil {
[11]137 p.serveMainPage(ctx, nil)
[1]138 return
139 }
140
141 if p.Key != nil {
142 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[11]143 p.serveMainPage(ctx, errors.New(`invalid "mortyhash" parameter`))
[1]144 return
145 }
146 }
147
148 parsedURI, err := url.Parse(string(requestURI))
149
[11]150 if err != nil {
151 p.serveMainPage(ctx, err)
[1]152 return
153 }
154
155 req := fasthttp.AcquireRequest()
156 defer fasthttp.ReleaseRequest(req)
[12]157 req.SetConnectionClose()
[1]158
159 reqQuery := parsedURI.Query()
160 ctx.QueryArgs().VisitAll(func(key, value []byte) {
161 reqQuery.Add(string(key), string(value))
162 })
163
164 parsedURI.RawQuery = reqQuery.Encode()
165
166 uriStr := parsedURI.String()
167
168 log.Println("getting", uriStr)
169
170 req.SetRequestURI(uriStr)
171 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
172
173 resp := fasthttp.AcquireResponse()
174 defer fasthttp.ReleaseResponse(resp)
175
176 req.Header.SetMethodBytes(ctx.Method())
177 if ctx.IsPost() || ctx.IsPut() {
178 req.SetBody(ctx.PostBody())
179 }
180
[11]181 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
182
183 if err != nil {
184 p.serveMainPage(ctx, err)
[1]185 return
186 }
187
188 if resp.StatusCode() != 200 {
189 switch resp.StatusCode() {
[7]190 case 301, 302, 303, 307, 308:
[1]191 loc := resp.Header.Peek("Location")
192 if loc != nil {
193 url, err := proxifyURI(&RequestConfig{p.Key, parsedURI}, string(loc))
194 if err == nil {
195 ctx.SetStatusCode(resp.StatusCode())
196 ctx.Response.Header.Add("Location", url)
197 log.Println("redirect to", string(loc))
198 return
199 }
200 }
201 }
202 log.Println("invalid request:", resp.StatusCode())
203 return
204 }
205
206 contentType := resp.Header.Peek("Content-Type")
207
208 if contentType == nil {
[11]209 p.serveMainPage(ctx, errors.New("invalid content type"))
[1]210 return
211 }
212
213 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
214
215 var responseBody []byte
216
217 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
218 var err error
219 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
[11]220 if err != nil {
221 p.serveMainPage(ctx, err)
[1]222 return
223 }
224 } else {
225 responseBody = resp.Body()
226 }
227
228 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
229
230 switch {
231 case bytes.Contains(contentType, []byte("css")):
232 sanitizeCSS(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
233 case bytes.Contains(contentType, []byte("html")):
234 sanitizeHTML(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
235 default:
236 ctx.Write(responseBody)
237 }
238}
239
[10]240func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]241 // serve robots.txt
[10]242 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
243 ctx.SetContentType("text/plain")
244 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
245 return true
246 }
[11]247
[10]248 return false
249}
250
[1]251func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
252 param := ctx.QueryArgs().PeekBytes(paramName)
253
254 if param == nil {
255 param = ctx.PostArgs().PeekBytes(paramName)
256 if param != nil {
257 ctx.PostArgs().DelBytes(paramName)
258 }
259 } else {
260 ctx.QueryArgs().DelBytes(paramName)
261 }
262
263 return param
264}
265
[9]266func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]267 // TODO
268
269 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
270
271 if urlSlices == nil {
[9]272 out.Write(css)
[1]273 return
274 }
275
276 startIndex := 0
277
278 for _, s := range urlSlices {
[8]279 urlStart := s[6]
280 urlEnd := s[7]
[1]281
282 if uri, err := proxifyURI(rc, string(css[urlStart:urlEnd])); err == nil {
[9]283 out.Write(css[startIndex:urlStart])
284 out.Write([]byte(uri))
[1]285 startIndex = urlEnd
286 } else {
287 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
288 }
289 }
290 if startIndex < len(css) {
[9]291 out.Write(css[startIndex:len(css)])
[1]292 }
293}
294
[9]295func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]296 r := bytes.NewReader(htmlDoc)
297 decoder := html.NewTokenizer(r)
298 decoder.AllowCDATA(true)
299
300 unsafeElements := make([][]byte, 0, 8)
301 state := STATE_DEFAULT
302
303 for {
304 token := decoder.Next()
305 if token == html.ErrorToken {
306 err := decoder.Err()
307 if err != io.EOF {
308 log.Println("failed to parse HTML:")
309 }
310 break
311 }
312
313 if len(unsafeElements) == 0 {
314
315 switch token {
316 case html.StartTagToken, html.SelfClosingTagToken:
317 tag, hasAttrs := decoder.TagName()
318 safe := !inArray(tag, UNSAFE_ELEMENTS)
319 if !safe {
320 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
321 var unsafeTag []byte = make([]byte, len(tag))
322 copy(unsafeTag, tag)
323 unsafeElements = append(unsafeElements, unsafeTag)
324 }
325 break
326 }
327 if bytes.Equal(tag, []byte("noscript")) {
328 state = STATE_IN_NOSCRIPT
329 break
330 }
331 var attrs [][][]byte
332 if hasAttrs {
333 for {
334 attrName, attrValue, moreAttr := decoder.TagAttr()
335 attrs = append(attrs, [][]byte{attrName, attrValue})
336 if !moreAttr {
337 break
338 }
339 }
[13]340 }
341 if bytes.Equal(tag, []byte("link")) {
342 sanitizeLinkTag(rc, out, attrs)
343 break
344 }
345
346 fmt.Fprintf(out, "<%s", tag)
347
348 if hasAttrs {
[1]349 if bytes.Equal(tag, []byte("meta")) {
[9]350 sanitizeMetaAttrs(rc, out, attrs)
[1]351 } else {
[9]352 sanitizeAttrs(rc, out, attrs)
[1]353 }
354 }
[13]355
[1]356 if token == html.SelfClosingTagToken {
[9]357 fmt.Fprintf(out, " />")
[1]358 } else {
[9]359 fmt.Fprintf(out, ">")
[1]360 if bytes.Equal(tag, []byte("style")) {
361 state = STATE_IN_STYLE
362 }
363 }
[13]364
[1]365 if bytes.Equal(tag, []byte("form")) {
366 var formURL *url.URL
367 for _, attr := range attrs {
368 if bytes.Equal(attr[0], []byte("action")) {
369 formURL, _ = url.Parse(string(attr[1]))
370 mergeURIs(rc.baseURL, formURL)
371 break
372 }
373 }
374 if formURL == nil {
375 formURL = rc.baseURL
376 }
[2]377 urlStr := formURL.String()
378 var key string
379 if rc.Key != nil {
380 key = hash(urlStr, rc.Key)
381 }
[9]382 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]383
384 }
385
386 case html.EndTagToken:
387 tag, _ := decoder.TagName()
388 writeEndTag := true
389 switch string(tag) {
390 case "body":
[9]391 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.baseURL.String())
[1]392 case "style":
393 state = STATE_DEFAULT
394 case "noscript":
395 state = STATE_DEFAULT
396 writeEndTag = false
397 }
398 // skip noscript tags - only the tag, not the content, because javascript is sanitized
399 if writeEndTag {
[9]400 fmt.Fprintf(out, "</%s>", tag)
[1]401 }
402
403 case html.TextToken:
404 switch state {
405 case STATE_DEFAULT:
[9]406 fmt.Fprintf(out, "%s", decoder.Raw())
[1]407 case STATE_IN_STYLE:
[9]408 sanitizeCSS(rc, out, decoder.Raw())
[1]409 case STATE_IN_NOSCRIPT:
[9]410 sanitizeHTML(rc, out, decoder.Raw())
[1]411 }
412
413 case html.DoctypeToken, html.CommentToken:
[9]414 out.Write(decoder.Raw())
[1]415 }
416 } else {
417 switch token {
418 case html.StartTagToken:
419 tag, _ := decoder.TagName()
420 if inArray(tag, UNSAFE_ELEMENTS) {
421 unsafeElements = append(unsafeElements, tag)
422 }
423
424 case html.EndTagToken:
425 tag, _ := decoder.TagName()
426 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
427 unsafeElements = unsafeElements[:len(unsafeElements)-1]
428 }
429 }
430 }
431 }
432}
433
[13]434func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
435 exclude := false
436 for _, attr := range attrs {
437 attrName := attr[0]
438 attrValue := attr[1]
439 if bytes.Equal(attrName, []byte("rel")) {
440 if bytes.Equal(attrValue, []byte("dns-prefetch")) {
441 exclude = true
442 break
443 }
444 }
445 if bytes.Equal(attrName, []byte("as")) {
446 if bytes.Equal(attrValue, []byte("script")) {
447 exclude = true
448 break
449 }
450 }
451 }
452
453 if !exclude {
454 out.Write([]byte("<link"))
455 for _, attr := range attrs {
456 sanitizeAttr(rc, out, attr[0], attr[1])
457 }
458 out.Write([]byte(">"))
459 }
460}
461
[9]462func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]463 var http_equiv []byte
464 var content []byte
465
466 for _, attr := range attrs {
467 attrName := attr[0]
468 attrValue := attr[1]
469 if bytes.Equal(attrName, []byte("http-equiv")) {
470 http_equiv = bytes.ToLower(attrValue)
471 }
472 if bytes.Equal(attrName, []byte("content")) {
473 content = attrValue
474 }
475 }
476
[14]477 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
478 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
479 contentUrl := content[urlIndex+4:]
480 if uri, err := proxifyURI(rc, string(contentUrl)); err == nil {
481 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]482 }
483 } else {
[9]484 sanitizeAttrs(rc, out, attrs)
[1]485 }
486
487}
488
[9]489func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]490 for _, attr := range attrs {
[9]491 sanitizeAttr(rc, out, attr[0], attr[1])
[1]492 }
493}
494
[9]495func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue []byte) {
[1]496 if inArray(attrName, SAFE_ATTRIBUTES) {
[9]497 fmt.Fprintf(out, " %s=\"%s\"", attrName, attrValue)
[1]498 return
499 }
500 switch string(attrName) {
501 case "src", "href", "action":
502 if uri, err := proxifyURI(rc, string(attrValue)); err == nil {
[9]503 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]504 } else {
505 log.Println("cannot proxify uri:", attrValue)
506 }
507 case "style":
[9]508 fmt.Fprintf(out, " %s=\"", attrName)
509 sanitizeCSS(rc, out, attrValue)
510 out.Write([]byte("\""))
[1]511 }
512}
513
514func mergeURIs(u1, u2 *url.URL) {
515 if u2.Scheme == "" || u2.Scheme == "//" {
516 u2.Scheme = u1.Scheme
517 }
[3]518 if u2.Host == "" && u1.Path != "" {
[1]519 u2.Host = u1.Host
520 if len(u2.Path) == 0 || u2.Path[0] != '/' {
521 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
522 }
523 }
524}
525
526func proxifyURI(rc *RequestConfig, uri string) (string, error) {
527 // TODO check malicious data: - e.g. data:script
528 if strings.HasPrefix(uri, "data:") {
529 return uri, nil
530 }
531
532 if len(uri) > 0 && uri[0] == '#' {
533 return uri, nil
534 }
535
536 u, err := url.Parse(uri)
537 if err != nil {
538 return "", err
539 }
540 mergeURIs(rc.baseURL, u)
541
542 uri = u.String()
543
544 if rc.Key == nil {
545 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
546 }
547 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
548}
549
550func inArray(b []byte, a [][]byte) bool {
551 for _, b2 := range a {
552 if bytes.Equal(b, b2) {
553 return true
554 }
555 }
556 return false
557}
558
559func hash(msg string, key []byte) string {
560 mac := hmac.New(sha256.New, key)
561 mac.Write([]byte(msg))
562 return hex.EncodeToString(mac.Sum(nil))
563}
564
565func verifyRequestURI(uri, hashMsg, key []byte) bool {
566 h := make([]byte, hex.DecodedLen(len(hashMsg)))
567 _, err := hex.Decode(h, hashMsg)
568 if err != nil {
569 log.Println("hmac error:", err)
570 return false
571 }
572 mac := hmac.New(sha256.New, key)
573 mac.Write(uri)
574 return hmac.Equal(h, mac.Sum(nil))
575}
576
[11]577func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, err error) {
[1]578 ctx.SetContentType("text/html")
579 ctx.Write([]byte(`<!doctype html>
580<head>
[11]581<title>MortyProxy</title>
582<style>
583body { font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
584input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
585a { text-decoration: none; #2980b9; }
586h1, h2 { font-weight: 200; margin-bottom: 2rem; }
587h1 { font-size: 3em; }
588.footer { position: absolute; bottom: 2em; width: 100%; }
589.footer p { font-size: 0.8em; }
590
591</style>
[1]592</head>
[11]593<body>
594 <h1>MortyProxy</h1>`))
595 if err != nil {
596 ctx.SetStatusCode(404)
597 log.Println("error:", err)
598 ctx.Write([]byte("<h2>Error: "))
599 ctx.Write([]byte(html.EscapeString(err.Error())))
600 ctx.Write([]byte("</h2>"))
601 } else {
602 ctx.SetStatusCode(200)
603 }
[1]604 if p.Key == nil {
605 ctx.Write([]byte(`
606<form action="post">
607 Visit url: <input placeholder="https://url.." name="mortyurl" />
608 <input type="submit" value="go" />
609</form>`))
[11]610 } else {
611 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]612 }
613 ctx.Write([]byte(`
[11]614<div class="footer">
615 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
616 <a href="https://github.com/asciimoo/morty">view on github</a>
617 </p>
618</div>
[1]619</body>
620</html>`))
621}
622
623func main() {
624
[2]625 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]626 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[4]627 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]628 flag.Parse()
629
[4]630 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]631
632 if *key != "" {
633 p.Key = []byte(*key)
634 }
635
636 log.Println("listening on", *listen)
637
638 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
639 log.Fatal("Error in ListenAndServe:", err)
640 }
641}
Note: See TracBrowser for help on using the repository browser.