source: code/trunk/morty.go@ 23

Last change on this file since 23 was 23, checked in by asciimoo, 9 years ago

[mod] proxification refactor

File size: 15.8 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
[4]17 "time"
[1]18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
[21]31 Dial: fasthttp.DialDualStack,
[1]32 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
33}
34
[15]35var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]36
37var UNSAFE_ELEMENTS [][]byte = [][]byte{
38 []byte("applet"),
39 []byte("canvas"),
40 []byte("embed"),
41 //[]byte("iframe"),
42 []byte("script"),
43}
44
45var SAFE_ATTRIBUTES [][]byte = [][]byte{
46 []byte("abbr"),
47 []byte("accesskey"),
48 []byte("align"),
49 []byte("alt"),
[13]50 []byte("as"),
[1]51 []byte("autocomplete"),
52 []byte("charset"),
53 []byte("checked"),
54 []byte("class"),
55 []byte("content"),
56 []byte("contenteditable"),
57 []byte("contextmenu"),
58 []byte("dir"),
59 []byte("for"),
60 []byte("height"),
61 []byte("hidden"),
62 []byte("id"),
63 []byte("lang"),
64 []byte("media"),
65 []byte("method"),
66 []byte("name"),
67 []byte("nowrap"),
68 []byte("placeholder"),
69 []byte("property"),
70 []byte("rel"),
71 []byte("spellcheck"),
72 []byte("tabindex"),
73 []byte("target"),
74 []byte("title"),
75 []byte("translate"),
76 []byte("type"),
77 []byte("value"),
78 []byte("width"),
79}
80
81var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
82 []byte("area"),
83 []byte("base"),
84 []byte("br"),
85 []byte("col"),
86 []byte("embed"),
87 []byte("hr"),
88 []byte("img"),
89 []byte("input"),
90 []byte("keygen"),
91 []byte("link"),
92 []byte("meta"),
93 []byte("param"),
94 []byte("source"),
95 []byte("track"),
96 []byte("wbr"),
97}
98
99type Proxy struct {
[4]100 Key []byte
101 RequestTimeout time.Duration
[1]102}
103
104type RequestConfig struct {
105 Key []byte
[23]106 BaseURL *url.URL
[1]107}
108
[2]109var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]110
111var HTML_BODY_EXTENSION string = `
112<div id="mortyheader">
113 <input type="checkbox" id="mortytoggle" autocomplete="off" />
[5]114 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
[1]115</div>
116<style>
[5]117#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
[1]118#mortyheader a { color: #3498db; }
[5]119#mortyheader p { padding: 0; margin: 0; }
120#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
121#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
[1]122input[type=checkbox]#mortytoggle { display: none; }
123input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
124</style>
125`
126
127func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]128
129 if appRequestHandler(ctx) {
130 return
131 }
132
[1]133 requestHash := popRequestParam(ctx, []byte("mortyhash"))
134
135 requestURI := popRequestParam(ctx, []byte("mortyurl"))
136
137 if requestURI == nil {
[11]138 p.serveMainPage(ctx, nil)
[1]139 return
140 }
141
142 if p.Key != nil {
143 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[11]144 p.serveMainPage(ctx, errors.New(`invalid "mortyhash" parameter`))
[1]145 return
146 }
147 }
148
149 parsedURI, err := url.Parse(string(requestURI))
150
[18]151 if strings.HasSuffix(parsedURI.Host, ".onion") {
152 p.serveMainPage(ctx, errors.New("Tor urls are not supported yet"))
153 return
154 }
155
[11]156 if err != nil {
157 p.serveMainPage(ctx, err)
[1]158 return
159 }
160
161 req := fasthttp.AcquireRequest()
162 defer fasthttp.ReleaseRequest(req)
[12]163 req.SetConnectionClose()
[1]164
165 reqQuery := parsedURI.Query()
166 ctx.QueryArgs().VisitAll(func(key, value []byte) {
167 reqQuery.Add(string(key), string(value))
168 })
169
170 parsedURI.RawQuery = reqQuery.Encode()
171
172 uriStr := parsedURI.String()
173
174 log.Println("getting", uriStr)
175
176 req.SetRequestURI(uriStr)
177 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
178
179 resp := fasthttp.AcquireResponse()
180 defer fasthttp.ReleaseResponse(resp)
181
182 req.Header.SetMethodBytes(ctx.Method())
183 if ctx.IsPost() || ctx.IsPut() {
184 req.SetBody(ctx.PostBody())
185 }
186
[11]187 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
188
189 if err != nil {
190 p.serveMainPage(ctx, err)
[1]191 return
192 }
193
194 if resp.StatusCode() != 200 {
195 switch resp.StatusCode() {
[7]196 case 301, 302, 303, 307, 308:
[1]197 loc := resp.Header.Peek("Location")
198 if loc != nil {
[23]199 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
200 url, err := rc.ProxifyURI(string(loc))
[1]201 if err == nil {
202 ctx.SetStatusCode(resp.StatusCode())
203 ctx.Response.Header.Add("Location", url)
204 log.Println("redirect to", string(loc))
205 return
206 }
207 }
208 }
209 log.Println("invalid request:", resp.StatusCode())
210 return
211 }
212
213 contentType := resp.Header.Peek("Content-Type")
214
215 if contentType == nil {
[11]216 p.serveMainPage(ctx, errors.New("invalid content type"))
[1]217 return
218 }
219
[17]220 if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
221 p.serveMainPage(ctx, errors.New("forbidden content type"))
222 return
223 }
224
[1]225 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
226
227 var responseBody []byte
228
229 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
230 var err error
231 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
[11]232 if err != nil {
233 p.serveMainPage(ctx, err)
[1]234 return
235 }
236 } else {
237 responseBody = resp.Body()
238 }
239
240 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
241
242 switch {
243 case bytes.Contains(contentType, []byte("css")):
[23]244 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]245 case bytes.Contains(contentType, []byte("html")):
[23]246 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]247 default:
248 ctx.Write(responseBody)
249 }
250}
251
[10]252func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]253 // serve robots.txt
[10]254 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
255 ctx.SetContentType("text/plain")
256 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
257 return true
258 }
[11]259
[10]260 return false
261}
262
[1]263func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
264 param := ctx.QueryArgs().PeekBytes(paramName)
265
266 if param == nil {
267 param = ctx.PostArgs().PeekBytes(paramName)
268 if param != nil {
269 ctx.PostArgs().DelBytes(paramName)
270 }
271 } else {
272 ctx.QueryArgs().DelBytes(paramName)
273 }
274
275 return param
276}
277
[9]278func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]279 // TODO
280
281 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
282
283 if urlSlices == nil {
[9]284 out.Write(css)
[1]285 return
286 }
287
288 startIndex := 0
289
290 for _, s := range urlSlices {
[15]291 urlStart := s[4]
292 urlEnd := s[5]
[1]293
[23]294 if uri, err := rc.ProxifyURI(string(css[urlStart:urlEnd])); err == nil {
[9]295 out.Write(css[startIndex:urlStart])
296 out.Write([]byte(uri))
[1]297 startIndex = urlEnd
298 } else {
299 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
300 }
301 }
302 if startIndex < len(css) {
[9]303 out.Write(css[startIndex:len(css)])
[1]304 }
305}
306
[9]307func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]308 r := bytes.NewReader(htmlDoc)
309 decoder := html.NewTokenizer(r)
310 decoder.AllowCDATA(true)
311
312 unsafeElements := make([][]byte, 0, 8)
313 state := STATE_DEFAULT
314
315 for {
316 token := decoder.Next()
317 if token == html.ErrorToken {
318 err := decoder.Err()
319 if err != io.EOF {
320 log.Println("failed to parse HTML:")
321 }
322 break
323 }
324
325 if len(unsafeElements) == 0 {
326
327 switch token {
328 case html.StartTagToken, html.SelfClosingTagToken:
329 tag, hasAttrs := decoder.TagName()
330 safe := !inArray(tag, UNSAFE_ELEMENTS)
331 if !safe {
332 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
333 var unsafeTag []byte = make([]byte, len(tag))
334 copy(unsafeTag, tag)
335 unsafeElements = append(unsafeElements, unsafeTag)
336 }
337 break
338 }
339 if bytes.Equal(tag, []byte("noscript")) {
340 state = STATE_IN_NOSCRIPT
341 break
342 }
343 var attrs [][][]byte
344 if hasAttrs {
345 for {
346 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]347 attrs = append(attrs, [][]byte{
348 attrName,
349 attrValue,
350 []byte(html.EscapeString(string(attrValue))),
351 })
[1]352 if !moreAttr {
353 break
354 }
355 }
[13]356 }
357 if bytes.Equal(tag, []byte("link")) {
358 sanitizeLinkTag(rc, out, attrs)
359 break
360 }
361
362 fmt.Fprintf(out, "<%s", tag)
363
364 if hasAttrs {
[1]365 if bytes.Equal(tag, []byte("meta")) {
[9]366 sanitizeMetaAttrs(rc, out, attrs)
[1]367 } else {
[9]368 sanitizeAttrs(rc, out, attrs)
[1]369 }
370 }
[13]371
[1]372 if token == html.SelfClosingTagToken {
[9]373 fmt.Fprintf(out, " />")
[1]374 } else {
[9]375 fmt.Fprintf(out, ">")
[1]376 if bytes.Equal(tag, []byte("style")) {
377 state = STATE_IN_STYLE
378 }
379 }
[13]380
[1]381 if bytes.Equal(tag, []byte("form")) {
382 var formURL *url.URL
383 for _, attr := range attrs {
384 if bytes.Equal(attr[0], []byte("action")) {
385 formURL, _ = url.Parse(string(attr[1]))
[23]386 mergeURIs(rc.BaseURL, formURL)
[1]387 break
388 }
389 }
390 if formURL == nil {
[23]391 formURL = rc.BaseURL
[1]392 }
[2]393 urlStr := formURL.String()
394 var key string
395 if rc.Key != nil {
396 key = hash(urlStr, rc.Key)
397 }
[9]398 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]399
400 }
401
402 case html.EndTagToken:
403 tag, _ := decoder.TagName()
404 writeEndTag := true
405 switch string(tag) {
406 case "body":
[23]407 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]408 case "style":
409 state = STATE_DEFAULT
410 case "noscript":
411 state = STATE_DEFAULT
412 writeEndTag = false
413 }
414 // skip noscript tags - only the tag, not the content, because javascript is sanitized
415 if writeEndTag {
[9]416 fmt.Fprintf(out, "</%s>", tag)
[1]417 }
418
419 case html.TextToken:
420 switch state {
421 case STATE_DEFAULT:
[9]422 fmt.Fprintf(out, "%s", decoder.Raw())
[1]423 case STATE_IN_STYLE:
[9]424 sanitizeCSS(rc, out, decoder.Raw())
[1]425 case STATE_IN_NOSCRIPT:
[9]426 sanitizeHTML(rc, out, decoder.Raw())
[1]427 }
428
429 case html.DoctypeToken, html.CommentToken:
[9]430 out.Write(decoder.Raw())
[1]431 }
432 } else {
433 switch token {
434 case html.StartTagToken:
435 tag, _ := decoder.TagName()
436 if inArray(tag, UNSAFE_ELEMENTS) {
437 unsafeElements = append(unsafeElements, tag)
438 }
439
440 case html.EndTagToken:
441 tag, _ := decoder.TagName()
442 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
443 unsafeElements = unsafeElements[:len(unsafeElements)-1]
444 }
445 }
446 }
447 }
448}
449
[13]450func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
451 exclude := false
452 for _, attr := range attrs {
453 attrName := attr[0]
454 attrValue := attr[1]
455 if bytes.Equal(attrName, []byte("rel")) {
456 if bytes.Equal(attrValue, []byte("dns-prefetch")) {
457 exclude = true
458 break
459 }
460 }
461 if bytes.Equal(attrName, []byte("as")) {
462 if bytes.Equal(attrValue, []byte("script")) {
463 exclude = true
464 break
465 }
466 }
467 }
468
469 if !exclude {
470 out.Write([]byte("<link"))
471 for _, attr := range attrs {
[21]472 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]473 }
474 out.Write([]byte(">"))
475 }
476}
477
[9]478func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]479 var http_equiv []byte
480 var content []byte
481
482 for _, attr := range attrs {
483 attrName := attr[0]
484 attrValue := attr[1]
485 if bytes.Equal(attrName, []byte("http-equiv")) {
486 http_equiv = bytes.ToLower(attrValue)
487 }
488 if bytes.Equal(attrName, []byte("content")) {
489 content = attrValue
490 }
491 }
492
[14]493 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
494 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
495 contentUrl := content[urlIndex+4:]
[23]496 if uri, err := rc.ProxifyURI(string(contentUrl)); err == nil {
[14]497 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]498 }
499 } else {
[9]500 sanitizeAttrs(rc, out, attrs)
[1]501 }
502
503}
504
[9]505func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]506 for _, attr := range attrs {
[21]507 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]508 }
509}
510
[21]511func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]512 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]513 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]514 return
515 }
516 switch string(attrName) {
517 case "src", "href", "action":
[23]518 if uri, err := rc.ProxifyURI(string(attrValue)); err == nil {
[9]519 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]520 } else {
521 log.Println("cannot proxify uri:", attrValue)
522 }
523 case "style":
[21]524 cssAttr := bytes.NewBuffer(nil)
525 sanitizeCSS(rc, cssAttr, attrValue)
526 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]527 }
528}
529
530func mergeURIs(u1, u2 *url.URL) {
531 if u2.Scheme == "" || u2.Scheme == "//" {
532 u2.Scheme = u1.Scheme
533 }
[3]534 if u2.Host == "" && u1.Path != "" {
[1]535 u2.Host = u1.Host
536 if len(u2.Path) == 0 || u2.Path[0] != '/' {
537 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
538 }
539 }
540}
541
[23]542func (rc *RequestConfig) ProxifyURI(uri string) (string, error) {
[1]543 // TODO check malicious data: - e.g. data:script
544 if strings.HasPrefix(uri, "data:") {
545 return uri, nil
546 }
547
548 if len(uri) > 0 && uri[0] == '#' {
549 return uri, nil
550 }
551
552 u, err := url.Parse(uri)
553 if err != nil {
554 return "", err
555 }
[23]556 mergeURIs(rc.BaseURL, u)
[1]557
558 uri = u.String()
559
560 if rc.Key == nil {
561 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
562 }
563 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
564}
565
566func inArray(b []byte, a [][]byte) bool {
567 for _, b2 := range a {
568 if bytes.Equal(b, b2) {
569 return true
570 }
571 }
572 return false
573}
574
575func hash(msg string, key []byte) string {
576 mac := hmac.New(sha256.New, key)
577 mac.Write([]byte(msg))
578 return hex.EncodeToString(mac.Sum(nil))
579}
580
581func verifyRequestURI(uri, hashMsg, key []byte) bool {
582 h := make([]byte, hex.DecodedLen(len(hashMsg)))
583 _, err := hex.Decode(h, hashMsg)
584 if err != nil {
585 log.Println("hmac error:", err)
586 return false
587 }
588 mac := hmac.New(sha256.New, key)
589 mac.Write(uri)
590 return hmac.Equal(h, mac.Sum(nil))
591}
592
[11]593func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, err error) {
[1]594 ctx.SetContentType("text/html")
595 ctx.Write([]byte(`<!doctype html>
596<head>
[11]597<title>MortyProxy</title>
598<style>
599body { font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
600input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
601a { text-decoration: none; #2980b9; }
602h1, h2 { font-weight: 200; margin-bottom: 2rem; }
603h1 { font-size: 3em; }
604.footer { position: absolute; bottom: 2em; width: 100%; }
605.footer p { font-size: 0.8em; }
606
607</style>
[1]608</head>
[11]609<body>
610 <h1>MortyProxy</h1>`))
611 if err != nil {
612 ctx.SetStatusCode(404)
613 log.Println("error:", err)
614 ctx.Write([]byte("<h2>Error: "))
615 ctx.Write([]byte(html.EscapeString(err.Error())))
616 ctx.Write([]byte("</h2>"))
617 } else {
618 ctx.SetStatusCode(200)
619 }
[1]620 if p.Key == nil {
621 ctx.Write([]byte(`
622<form action="post">
623 Visit url: <input placeholder="https://url.." name="mortyurl" />
624 <input type="submit" value="go" />
625</form>`))
[11]626 } else {
627 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]628 }
629 ctx.Write([]byte(`
[11]630<div class="footer">
631 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
632 <a href="https://github.com/asciimoo/morty">view on github</a>
633 </p>
634</div>
[1]635</body>
636</html>`))
637}
638
639func main() {
640
[2]641 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]642 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[4]643 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]644 flag.Parse()
645
[4]646 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]647
648 if *key != "" {
649 p.Key = []byte(*key)
650 }
651
652 log.Println("listening on", *listen)
653
654 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
655 log.Fatal("Error in ListenAndServe:", err)
656 }
657}
Note: See TracBrowser for help on using the repository browser.