source: code/trunk/morty.go@ 65

Last change on this file since 65 was 65, checked in by asciimoo, 8 years ago

Merge pull request #45 from dalf/mimetype

[enh] parse and filter Content-Type.

File size: 25.7 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
[64]13 "mime"
[1]14 "net/url"
[64]15 "path/filepath"
[1]16 "regexp"
17 "strings"
[4]18 "time"
[60]19 "unicode/utf8"
[1]20
21 "github.com/valyala/fasthttp"
22 "golang.org/x/net/html"
[45]23 "golang.org/x/net/html/charset"
24 "golang.org/x/text/encoding"
[63]25
26 "github.com/dalf/morty/contenttype"
[1]27)
28
29const (
30 STATE_DEFAULT int = 0
31 STATE_IN_STYLE int = 1
32 STATE_IN_NOSCRIPT int = 2
33)
34
35var CLIENT *fasthttp.Client = &fasthttp.Client{
36 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
37}
38
[27]39var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]40
[64]41var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
42 // html
43 contenttype.NewFilterEquals("text", "html", ""),
44 contenttype.NewFilterEquals("application", "xhtml", "xml"),
45 // css
46 contenttype.NewFilterEquals("text", "css", ""),
47 // images
48 contenttype.NewFilterEquals("image", "gif", ""),
49 contenttype.NewFilterEquals("image", "png", ""),
50 contenttype.NewFilterEquals("image", "jpeg", ""),
51 contenttype.NewFilterEquals("image", "pjpeg", ""),
52 contenttype.NewFilterEquals("image", "webp", ""),
53 contenttype.NewFilterEquals("image", "tiff", ""),
54 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
55 contenttype.NewFilterEquals("image", "bmp", ""),
56 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
57 // fonts
58 contenttype.NewFilterEquals("application", "font-otf", ""),
59 contenttype.NewFilterEquals("application", "font-ttf", ""),
60 contenttype.NewFilterEquals("application", "font-woff", ""),
61 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
[63]62})
63
[64]64var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
65 // texts
66 contenttype.NewFilterEquals("text", "csv", ""),
67 contenttype.NewFilterEquals("text", "tab-separated-value", ""),
68 contenttype.NewFilterEquals("text", "plain", ""),
69 // API
70 contenttype.NewFilterEquals("application", "json", ""),
71 // Documents
72 contenttype.NewFilterEquals("application", "x-latex", ""),
73 contenttype.NewFilterEquals("application", "pdf", ""),
74 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
75 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
76 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
77 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
78 // Compressed archives
79 contenttype.NewFilterEquals("application", "zip", ""),
80 contenttype.NewFilterEquals("application", "gzip", ""),
81 contenttype.NewFilterEquals("application", "x-compressed", ""),
82 contenttype.NewFilterEquals("application", "x-gtar", ""),
83 contenttype.NewFilterEquals("application", "x-compress", ""),
84 // Generic binary
85 contenttype.NewFilterEquals("application", "octet-stream", ""),
86})
87
[63]88var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
89 "charset": true,
90}
91
[1]92var UNSAFE_ELEMENTS [][]byte = [][]byte{
93 []byte("applet"),
94 []byte("canvas"),
95 []byte("embed"),
96 //[]byte("iframe"),
[46]97 []byte("math"),
[1]98 []byte("script"),
[46]99 []byte("svg"),
[1]100}
101
102var SAFE_ATTRIBUTES [][]byte = [][]byte{
103 []byte("abbr"),
104 []byte("accesskey"),
105 []byte("align"),
106 []byte("alt"),
[13]107 []byte("as"),
[1]108 []byte("autocomplete"),
109 []byte("charset"),
110 []byte("checked"),
111 []byte("class"),
112 []byte("content"),
113 []byte("contenteditable"),
114 []byte("contextmenu"),
115 []byte("dir"),
116 []byte("for"),
117 []byte("height"),
118 []byte("hidden"),
[46]119 []byte("hreflang"),
[1]120 []byte("id"),
121 []byte("lang"),
122 []byte("media"),
123 []byte("method"),
124 []byte("name"),
125 []byte("nowrap"),
126 []byte("placeholder"),
127 []byte("property"),
128 []byte("rel"),
129 []byte("spellcheck"),
130 []byte("tabindex"),
131 []byte("target"),
132 []byte("title"),
133 []byte("translate"),
134 []byte("type"),
135 []byte("value"),
136 []byte("width"),
137}
138
139var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
140 []byte("area"),
141 []byte("base"),
142 []byte("br"),
143 []byte("col"),
144 []byte("embed"),
145 []byte("hr"),
146 []byte("img"),
147 []byte("input"),
148 []byte("keygen"),
149 []byte("link"),
150 []byte("meta"),
151 []byte("param"),
152 []byte("source"),
153 []byte("track"),
154 []byte("wbr"),
155}
156
[46]157var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
158 []byte("alternate"),
159 []byte("archives"),
160 []byte("author"),
161 []byte("copyright"),
162 []byte("first"),
163 []byte("help"),
164 []byte("icon"),
165 []byte("index"),
166 []byte("last"),
167 []byte("license"),
168 []byte("manifest"),
169 []byte("next"),
170 []byte("pingback"),
171 []byte("prev"),
172 []byte("publisher"),
173 []byte("search"),
174 []byte("shortcut icon"),
175 []byte("stylesheet"),
176 []byte("up"),
177}
178
179var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
180 // X-UA-Compatible will be added automaticaly, so it can be skipped
181 []byte("date"),
182 []byte("last-modified"),
[50]183 []byte("refresh"), // URL rewrite
[46]184 // []byte("location"), TODO URL rewrite
185 []byte("content-language"),
186}
187
[1]188type Proxy struct {
[4]189 Key []byte
190 RequestTimeout time.Duration
[1]191}
192
193type RequestConfig struct {
194 Key []byte
[23]195 BaseURL *url.URL
[1]196}
197
[2]198var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]199
200var HTML_BODY_EXTENSION string = `
201<div id="mortyheader">
202 <input type="checkbox" id="mortytoggle" autocomplete="off" />
[36]203 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p></div>
[1]204</div>
205<style>
[36]206#mortyheader { position: fixed; padding: 12px 12px 12px 0; margin: 0; box-sizing: content-box; top: 15%%; left: 0; max-width: 140px; color: #444; overflow: hidden; z-index: 110000; font-size: 12px; line-height: normal; }
207#mortyheader a { color: #3498db; font-weight: bold; }
208#mortyheader p { padding: 0 0 0.7em 0; margin: 0; }
209#mortyheader > div { padding: 8px; font-size: 12px !important; font-family: sans !important; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
[5]210#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
[1]211input[type=checkbox]#mortytoggle { display: none; }
212input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
213</style>
214`
215
[46]216var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
217<meta http-equiv="X-UA-Compatible" content="IE=edge">
218`
[45]219
[1]220func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]221
222 if appRequestHandler(ctx) {
223 return
224 }
225
[1]226 requestHash := popRequestParam(ctx, []byte("mortyhash"))
227
228 requestURI := popRequestParam(ctx, []byte("mortyurl"))
229
230 if requestURI == nil {
[35]231 p.serveMainPage(ctx, 200, nil)
[1]232 return
233 }
234
235 if p.Key != nil {
236 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]237 // HTTP status code 403 : Forbidden
238 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]239 return
240 }
241 }
242
243 parsedURI, err := url.Parse(string(requestURI))
244
[18]245 if strings.HasSuffix(parsedURI.Host, ".onion") {
[35]246 // HTTP status code 501 : Not Implemented
247 p.serveMainPage(ctx, 501, errors.New("Tor urls are not supported yet"))
[18]248 return
249 }
250
[11]251 if err != nil {
[35]252 // HTTP status code 500 : Internal Server Error
253 p.serveMainPage(ctx, 500, err)
[1]254 return
255 }
256
257 req := fasthttp.AcquireRequest()
258 defer fasthttp.ReleaseRequest(req)
[12]259 req.SetConnectionClose()
[1]260
[47]261 requestURIStr := string(requestURI)
[1]262
[47]263 log.Println("getting", requestURIStr)
[1]264
[47]265 req.SetRequestURI(requestURIStr)
[62]266 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
[1]267
268 resp := fasthttp.AcquireResponse()
269 defer fasthttp.ReleaseResponse(resp)
270
271 req.Header.SetMethodBytes(ctx.Method())
272 if ctx.IsPost() || ctx.IsPut() {
273 req.SetBody(ctx.PostBody())
274 }
275
[11]276 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
277
278 if err != nil {
[35]279 if err == fasthttp.ErrTimeout {
280 // HTTP status code 504 : Gateway Time-Out
281 p.serveMainPage(ctx, 504, err)
282 } else {
283 // HTTP status code 500 : Internal Server Error
284 p.serveMainPage(ctx, 500, err)
285 }
[1]286 return
287 }
288
289 if resp.StatusCode() != 200 {
290 switch resp.StatusCode() {
[7]291 case 301, 302, 303, 307, 308:
[1]292 loc := resp.Header.Peek("Location")
293 if loc != nil {
[23]294 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
[60]295 url, err := rc.ProxifyURI(loc)
[1]296 if err == nil {
297 ctx.SetStatusCode(resp.StatusCode())
298 ctx.Response.Header.Add("Location", url)
299 log.Println("redirect to", string(loc))
300 return
301 }
302 }
303 }
[47]304 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]305 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]306 return
307 }
308
[63]309 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]310
[63]311 if contentTypeBytes == nil {
[35]312 // HTTP status code 503 : Service Unavailable
313 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]314 return
315 }
316
[63]317 contentTypeString := string(contentTypeBytes)
318
319 // decode Content-Type header
320 contentType, error := contenttype.ParseContentType(contentTypeString)
321 if error != nil {
322 // HTTP status code 503 : Service Unavailable
323 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
324 return
325 }
326
[64]327 // content-disposition
328 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
329
330 // check content type
331 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
332 // it is not a usual content type
333 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
334 // force attachment for allowed content type
335 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
336 } else {
337 // deny access to forbidden content type
338 // HTTP status code 403 : Forbidden
339 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
340 return
341 }
[17]342 }
343
[63]344 // HACK : replace */xhtml by text/html
345 if contentType.SubType == "xhtml" {
346 contentType.TopLevelType = "text"
347 contentType.SubType = "html"
348 contentType.Suffix = ""
349 }
[1]350
[63]351 // conversion to UTF-8
[1]352 var responseBody []byte
353
[63]354 if contentType.TopLevelType == "text" {
355 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]356 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
357 responseBody, err = e.NewDecoder().Bytes(resp.Body())
358 if err != nil {
359 // HTTP status code 503 : Service Unavailable
360 p.serveMainPage(ctx, 503, err)
361 return
362 }
363 } else {
364 responseBody = resp.Body()
[1]365 }
[63]366 // update the charset or specify it
367 contentType.Parameters["charset"] = "UTF-8"
[1]368 } else {
369 responseBody = resp.Body()
370 }
371
[63]372 //
373 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]374
[63]375 // set the content type
376 ctx.SetContentType(contentType.String())
377
[64]378 // output according to MIME type
[1]379 switch {
[63]380 case contentType.SubType == "css" && contentType.Suffix == "":
[23]381 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[63]382 case contentType.SubType == "html" && contentType.Suffix == "":
[23]383 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]384 default:
[64]385 if contentDispositionBytes != nil {
386 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]387 }
[1]388 ctx.Write(responseBody)
389 }
390}
391
[64]392// force content-disposition to attachment
393func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
394 var contentDispositionParams map[string]string
395
396 if contentDispositionBytes != nil {
397 var err error
398 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
399 if err != nil {
400 contentDispositionParams = make(map[string]string)
401 }
402 } else {
403 contentDispositionParams = make(map[string]string)
404 }
405
406 _, fileNameDefined := contentDispositionParams["filename"]
407 if !fileNameDefined {
408 // TODO : sanitize filename
409 contentDispositionParams["fileName"] = filepath.Base(url.Path)
410 }
411
412 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
413}
414
[10]415func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]416 // serve robots.txt
[10]417 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
418 ctx.SetContentType("text/plain")
419 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
420 return true
421 }
[11]422
[10]423 return false
424}
425
[1]426func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
427 param := ctx.QueryArgs().PeekBytes(paramName)
428
429 if param == nil {
430 param = ctx.PostArgs().PeekBytes(paramName)
431 if param != nil {
432 ctx.PostArgs().DelBytes(paramName)
433 }
434 } else {
435 ctx.QueryArgs().DelBytes(paramName)
436 }
437
438 return param
439}
440
[9]441func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]442 // TODO
443
444 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
445
446 if urlSlices == nil {
[9]447 out.Write(css)
[1]448 return
449 }
450
451 startIndex := 0
452
453 for _, s := range urlSlices {
[15]454 urlStart := s[4]
455 urlEnd := s[5]
[1]456
[60]457 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]458 out.Write(css[startIndex:urlStart])
459 out.Write([]byte(uri))
[1]460 startIndex = urlEnd
461 } else {
[36]462 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]463 }
464 }
465 if startIndex < len(css) {
[9]466 out.Write(css[startIndex:len(css)])
[1]467 }
468}
469
[9]470func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]471 r := bytes.NewReader(htmlDoc)
472 decoder := html.NewTokenizer(r)
473 decoder.AllowCDATA(true)
474
475 unsafeElements := make([][]byte, 0, 8)
476 state := STATE_DEFAULT
477 for {
478 token := decoder.Next()
479 if token == html.ErrorToken {
480 err := decoder.Err()
481 if err != io.EOF {
482 log.Println("failed to parse HTML:")
483 }
484 break
485 }
486
487 if len(unsafeElements) == 0 {
488
489 switch token {
490 case html.StartTagToken, html.SelfClosingTagToken:
491 tag, hasAttrs := decoder.TagName()
492 safe := !inArray(tag, UNSAFE_ELEMENTS)
493 if !safe {
494 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
495 var unsafeTag []byte = make([]byte, len(tag))
496 copy(unsafeTag, tag)
497 unsafeElements = append(unsafeElements, unsafeTag)
498 }
499 break
500 }
[38]501 if bytes.Equal(tag, []byte("base")) {
502 for {
503 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]504 if bytes.Equal(attrName, []byte("href")) {
505 parsedURI, err := url.Parse(string(attrValue))
506 if err == nil {
507 rc.BaseURL = parsedURI
508 }
[38]509 }
510 if !moreAttr {
511 break
512 }
513 }
514 break
515 }
[1]516 if bytes.Equal(tag, []byte("noscript")) {
517 state = STATE_IN_NOSCRIPT
518 break
519 }
520 var attrs [][][]byte
521 if hasAttrs {
522 for {
523 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]524 attrs = append(attrs, [][]byte{
525 attrName,
526 attrValue,
527 []byte(html.EscapeString(string(attrValue))),
528 })
[1]529 if !moreAttr {
530 break
531 }
532 }
[13]533 }
534 if bytes.Equal(tag, []byte("link")) {
535 sanitizeLinkTag(rc, out, attrs)
536 break
537 }
538
[45]539 if bytes.Equal(tag, []byte("meta")) {
540 sanitizeMetaTag(rc, out, attrs)
541 break
542 }
543
[13]544 fmt.Fprintf(out, "<%s", tag)
545
546 if hasAttrs {
[45]547 sanitizeAttrs(rc, out, attrs)
[1]548 }
[13]549
[1]550 if token == html.SelfClosingTagToken {
[9]551 fmt.Fprintf(out, " />")
[1]552 } else {
[9]553 fmt.Fprintf(out, ">")
[1]554 if bytes.Equal(tag, []byte("style")) {
555 state = STATE_IN_STYLE
556 }
557 }
[13]558
[45]559 if bytes.Equal(tag, []byte("head")) {
[46]560 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]561 }
562
[1]563 if bytes.Equal(tag, []byte("form")) {
564 var formURL *url.URL
565 for _, attr := range attrs {
566 if bytes.Equal(attr[0], []byte("action")) {
567 formURL, _ = url.Parse(string(attr[1]))
[28]568 formURL = mergeURIs(rc.BaseURL, formURL)
[1]569 break
570 }
571 }
572 if formURL == nil {
[23]573 formURL = rc.BaseURL
[1]574 }
[2]575 urlStr := formURL.String()
576 var key string
577 if rc.Key != nil {
578 key = hash(urlStr, rc.Key)
579 }
[9]580 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]581
582 }
583
584 case html.EndTagToken:
585 tag, _ := decoder.TagName()
586 writeEndTag := true
587 switch string(tag) {
588 case "body":
[23]589 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]590 case "style":
591 state = STATE_DEFAULT
592 case "noscript":
593 state = STATE_DEFAULT
594 writeEndTag = false
595 }
596 // skip noscript tags - only the tag, not the content, because javascript is sanitized
597 if writeEndTag {
[9]598 fmt.Fprintf(out, "</%s>", tag)
[1]599 }
600
601 case html.TextToken:
602 switch state {
603 case STATE_DEFAULT:
[9]604 fmt.Fprintf(out, "%s", decoder.Raw())
[1]605 case STATE_IN_STYLE:
[9]606 sanitizeCSS(rc, out, decoder.Raw())
[1]607 case STATE_IN_NOSCRIPT:
[9]608 sanitizeHTML(rc, out, decoder.Raw())
[1]609 }
610
[62]611 case html.CommentToken:
612 // ignore comment. TODO : parse IE conditional comment
613
614 case html.DoctypeToken:
[9]615 out.Write(decoder.Raw())
[1]616 }
617 } else {
618 switch token {
619 case html.StartTagToken:
620 tag, _ := decoder.TagName()
621 if inArray(tag, UNSAFE_ELEMENTS) {
622 unsafeElements = append(unsafeElements, tag)
623 }
624
625 case html.EndTagToken:
626 tag, _ := decoder.TagName()
627 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
628 unsafeElements = unsafeElements[:len(unsafeElements)-1]
629 }
630 }
631 }
632 }
633}
634
[13]635func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
636 exclude := false
637 for _, attr := range attrs {
638 attrName := attr[0]
639 attrValue := attr[1]
640 if bytes.Equal(attrName, []byte("rel")) {
[46]641 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]642 exclude = true
643 break
644 }
645 }
646 if bytes.Equal(attrName, []byte("as")) {
647 if bytes.Equal(attrValue, []byte("script")) {
648 exclude = true
649 break
650 }
651 }
652 }
653
654 if !exclude {
655 out.Write([]byte("<link"))
656 for _, attr := range attrs {
[21]657 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]658 }
659 out.Write([]byte(">"))
660 }
661}
662
[45]663func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]664 var http_equiv []byte
665 var content []byte
666
667 for _, attr := range attrs {
668 attrName := attr[0]
669 attrValue := attr[1]
670 if bytes.Equal(attrName, []byte("http-equiv")) {
671 http_equiv = bytes.ToLower(attrValue)
[46]672 // exclude some <meta http-equiv="..." ..>
673 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
674 return
675 }
[1]676 }
677 if bytes.Equal(attrName, []byte("content")) {
678 content = attrValue
679 }
[45]680 if bytes.Equal(attrName, []byte("charset")) {
681 // exclude <meta charset="...">
682 return
683 }
[1]684 }
685
[45]686 out.Write([]byte("<meta"))
[14]687 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
688 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
689 contentUrl := content[urlIndex+4:]
[36]690 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]691 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]692 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]693 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]694 }
695 }
696 // output proxify result
[60]697 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]698 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]699 }
700 } else {
[46]701 if len(http_equiv) > 0 {
702 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
703 }
[9]704 sanitizeAttrs(rc, out, attrs)
[1]705 }
[45]706 out.Write([]byte(">"))
[1]707}
708
[9]709func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]710 for _, attr := range attrs {
[21]711 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]712 }
713}
714
[21]715func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]716 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]717 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]718 return
719 }
720 switch string(attrName) {
721 case "src", "href", "action":
[60]722 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]723 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]724 } else {
[36]725 log.Println("cannot proxify uri:", string(attrValue))
[1]726 }
727 case "style":
[21]728 cssAttr := bytes.NewBuffer(nil)
729 sanitizeCSS(rc, cssAttr, attrValue)
730 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]731 }
732}
733
[36]734func mergeURIs(u1, u2 *url.URL) *url.URL {
[28]735 return u1.ResolveReference(u2)
[1]736}
737
[60]738// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
739// avoid memory allocation (except for the scheme)
740func sanitizeURI(uri []byte) ([]byte, string) {
741 first_rune_index := 0
742 first_rune_seen := false
743 scheme_last_index := -1
744 buffer := bytes.NewBuffer(make([]byte, 0, 10))
745
746 // remove trailing space and special characters
747 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
748
749 // loop over byte by byte
750 for i, c := range uri {
751 // ignore special characters and space (c <= 32)
752 if c > 32 {
753 // append to the lower case of the rune to buffer
754 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
755 c = c + 'a' - 'A'
756 }
757
758 buffer.WriteByte(c)
759
760 // update the first rune index that is not a special rune
761 if !first_rune_seen {
762 first_rune_index = i
763 first_rune_seen = true
764 }
765
766 if c == ':' {
767 // colon rune found, we have found the scheme
768 scheme_last_index = i
769 break
770 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
771 // special case : most probably a relative URI
772 break
773 }
774 }
775 }
776
777 if scheme_last_index != -1 {
778 // scheme found
779 // copy the "lower case without special runes scheme" before the ":" rune
780 scheme_start_index := scheme_last_index - buffer.Len() + 1
781 copy(uri[scheme_start_index:], buffer.Bytes())
782 // and return the result
783 return uri[scheme_start_index:], buffer.String()
784 } else {
785 // scheme NOT found
786 return uri[first_rune_index:], ""
787 }
788}
789
790func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
791 // sanitize URI
792 uri, scheme := sanitizeURI(uri)
793
[28]794 // remove javascript protocol
[60]795 if scheme == "javascript:" {
[28]796 return "", nil
797 }
[57]798
[1]799 // TODO check malicious data: - e.g. data:script
[60]800 if scheme == "data:" {
[61]801 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
802 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
803 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
804 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
805 bytes.HasPrefix(uri, []byte("data:image/webp")) {
806 // should be safe
807 return string(uri), nil
808 } else {
809 // unsafe data
810 return "", nil
811 }
[1]812 }
813
[57]814 // parse the uri
[60]815 u, err := url.Parse(string(uri))
[1]816 if err != nil {
817 return "", err
818 }
[57]819
820 // get the fragment (with the prefix "#")
821 fragment := ""
822 if len(u.Fragment) > 0 {
823 fragment = "#" + u.Fragment
824 }
825
826 // reset the fragment: it is not included in the mortyurl
827 u.Fragment = ""
828
829 // merge the URI with the document URI
[28]830 u = mergeURIs(rc.BaseURL, u)
[1]831
[57]832 // simple internal link ?
833 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
834 if u.Scheme == rc.BaseURL.Scheme &&
835 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
836 u.Host == rc.BaseURL.Host &&
837 u.Path == rc.BaseURL.Path &&
838 u.RawQuery == rc.BaseURL.RawQuery {
839 // the fragment is the only difference between the document URI and the uri parameter
840 return fragment, nil
841 }
842
843 // return full URI and fragment (if not empty)
[60]844 morty_uri := u.String()
[1]845
846 if rc.Key == nil {
[60]847 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]848 }
[60]849 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]850}
851
852func inArray(b []byte, a [][]byte) bool {
853 for _, b2 := range a {
854 if bytes.Equal(b, b2) {
855 return true
856 }
857 }
858 return false
859}
860
861func hash(msg string, key []byte) string {
862 mac := hmac.New(sha256.New, key)
863 mac.Write([]byte(msg))
864 return hex.EncodeToString(mac.Sum(nil))
865}
866
867func verifyRequestURI(uri, hashMsg, key []byte) bool {
868 h := make([]byte, hex.DecodedLen(len(hashMsg)))
869 _, err := hex.Decode(h, hashMsg)
870 if err != nil {
871 log.Println("hmac error:", err)
872 return false
873 }
874 mac := hmac.New(sha256.New, key)
875 mac.Write(uri)
876 return hmac.Equal(h, mac.Sum(nil))
877}
878
[35]879func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[1]880 ctx.SetContentType("text/html")
[35]881 ctx.SetStatusCode(statusCode)
[1]882 ctx.Write([]byte(`<!doctype html>
883<head>
[11]884<title>MortyProxy</title>
[36]885<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
[11]886<style>
[36]887html { height: 100%; }
888body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
[11]889input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
[36]890input[placeholder] { width:80%; }
[11]891a { text-decoration: none; #2980b9; }
892h1, h2 { font-weight: 200; margin-bottom: 2rem; }
893h1 { font-size: 3em; }
[36]894.container { flex:1; min-height: 100%; margin-bottom: 1em; }
895.footer { margin: 1em; }
[11]896.footer p { font-size: 0.8em; }
897</style>
[1]898</head>
[11]899<body>
[36]900 <div class="container">
901 <h1>MortyProxy</h1>
902`))
[11]903 if err != nil {
904 log.Println("error:", err)
905 ctx.Write([]byte("<h2>Error: "))
906 ctx.Write([]byte(html.EscapeString(err.Error())))
907 ctx.Write([]byte("</h2>"))
908 }
[1]909 if p.Key == nil {
910 ctx.Write([]byte(`
[36]911 <form action="post">
912 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
913 <input type="submit" value="go" />
914 </form>`))
[11]915 } else {
916 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]917 }
918 ctx.Write([]byte(`
[36]919 </div>
920 <div class="footer">
921 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
922 <a href="https://github.com/asciimoo/morty">view on github</a>
923 </p>
924 </div>
[1]925</body>
926</html>`))
927}
928
929func main() {
930
[2]931 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]932 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[24]933 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[4]934 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]935 flag.Parse()
936
[24]937 if *ipv6 {
938 CLIENT.Dial = fasthttp.DialDualStack
939 }
940
[4]941 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]942
943 if *key != "" {
944 p.Key = []byte(*key)
945 }
946
947 log.Println("listening on", *listen)
948
949 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
950 log.Fatal("Error in ListenAndServe:", err)
951 }
952}
Note: See TracBrowser for help on using the repository browser.