source: code/trunk/morty.go@ 72

Last change on this file since 72 was 72, checked in by asciimoo, 8 years ago

[fix] hide morty popup ++ add link to source

File size: 27.2 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
[68]14 "mime"
[1]15 "net/url"
[68]16 "path/filepath"
[1]17 "regexp"
18 "strings"
[4]19 "time"
[60]20 "unicode/utf8"
[1]21
22 "github.com/valyala/fasthttp"
23 "golang.org/x/net/html"
[45]24 "golang.org/x/net/html/charset"
25 "golang.org/x/text/encoding"
[68]26
27 "github.com/asciimoo/morty/contenttype"
[1]28)
29
30const (
31 STATE_DEFAULT int = 0
32 STATE_IN_STYLE int = 1
33 STATE_IN_NOSCRIPT int = 2
34)
35
36var CLIENT *fasthttp.Client = &fasthttp.Client{
37 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
38}
39
[27]40var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]41
[68]42var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
43 // html
44 contenttype.NewFilterEquals("text", "html", ""),
45 contenttype.NewFilterEquals("application", "xhtml", "xml"),
46 // css
47 contenttype.NewFilterEquals("text", "css", ""),
48 // images
49 contenttype.NewFilterEquals("image", "gif", ""),
50 contenttype.NewFilterEquals("image", "png", ""),
51 contenttype.NewFilterEquals("image", "jpeg", ""),
52 contenttype.NewFilterEquals("image", "pjpeg", ""),
53 contenttype.NewFilterEquals("image", "webp", ""),
54 contenttype.NewFilterEquals("image", "tiff", ""),
55 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
56 contenttype.NewFilterEquals("image", "bmp", ""),
57 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
58 // fonts
59 contenttype.NewFilterEquals("application", "font-otf", ""),
60 contenttype.NewFilterEquals("application", "font-ttf", ""),
61 contenttype.NewFilterEquals("application", "font-woff", ""),
62 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
63})
64
65var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
66 // texts
67 contenttype.NewFilterEquals("text", "csv", ""),
68 contenttype.NewFilterEquals("text", "tab-separated-value", ""),
69 contenttype.NewFilterEquals("text", "plain", ""),
70 // API
71 contenttype.NewFilterEquals("application", "json", ""),
72 // Documents
73 contenttype.NewFilterEquals("application", "x-latex", ""),
74 contenttype.NewFilterEquals("application", "pdf", ""),
75 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
76 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
77 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
78 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
79 // Compressed archives
80 contenttype.NewFilterEquals("application", "zip", ""),
81 contenttype.NewFilterEquals("application", "gzip", ""),
82 contenttype.NewFilterEquals("application", "x-compressed", ""),
83 contenttype.NewFilterEquals("application", "x-gtar", ""),
84 contenttype.NewFilterEquals("application", "x-compress", ""),
85 // Generic binary
86 contenttype.NewFilterEquals("application", "octet-stream", ""),
87})
88
89var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
90 "charset": true,
91}
92
[1]93var UNSAFE_ELEMENTS [][]byte = [][]byte{
94 []byte("applet"),
95 []byte("canvas"),
96 []byte("embed"),
97 //[]byte("iframe"),
[46]98 []byte("math"),
[1]99 []byte("script"),
[46]100 []byte("svg"),
[1]101}
102
103var SAFE_ATTRIBUTES [][]byte = [][]byte{
104 []byte("abbr"),
105 []byte("accesskey"),
106 []byte("align"),
107 []byte("alt"),
[13]108 []byte("as"),
[1]109 []byte("autocomplete"),
110 []byte("charset"),
111 []byte("checked"),
112 []byte("class"),
113 []byte("content"),
114 []byte("contenteditable"),
115 []byte("contextmenu"),
116 []byte("dir"),
117 []byte("for"),
118 []byte("height"),
119 []byte("hidden"),
[46]120 []byte("hreflang"),
[1]121 []byte("id"),
122 []byte("lang"),
123 []byte("media"),
124 []byte("method"),
125 []byte("name"),
126 []byte("nowrap"),
127 []byte("placeholder"),
128 []byte("property"),
129 []byte("rel"),
130 []byte("spellcheck"),
131 []byte("tabindex"),
132 []byte("target"),
133 []byte("title"),
134 []byte("translate"),
135 []byte("type"),
136 []byte("value"),
137 []byte("width"),
138}
139
140var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
141 []byte("area"),
142 []byte("base"),
143 []byte("br"),
144 []byte("col"),
145 []byte("embed"),
146 []byte("hr"),
147 []byte("img"),
148 []byte("input"),
149 []byte("keygen"),
150 []byte("link"),
151 []byte("meta"),
152 []byte("param"),
153 []byte("source"),
154 []byte("track"),
155 []byte("wbr"),
156}
157
[46]158var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
159 []byte("alternate"),
160 []byte("archives"),
161 []byte("author"),
162 []byte("copyright"),
163 []byte("first"),
164 []byte("help"),
165 []byte("icon"),
166 []byte("index"),
167 []byte("last"),
168 []byte("license"),
169 []byte("manifest"),
170 []byte("next"),
171 []byte("pingback"),
172 []byte("prev"),
173 []byte("publisher"),
174 []byte("search"),
175 []byte("shortcut icon"),
176 []byte("stylesheet"),
177 []byte("up"),
178}
179
180var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
181 // X-UA-Compatible will be added automaticaly, so it can be skipped
182 []byte("date"),
183 []byte("last-modified"),
[50]184 []byte("refresh"), // URL rewrite
[46]185 // []byte("location"), TODO URL rewrite
186 []byte("content-language"),
187}
188
[1]189type Proxy struct {
[4]190 Key []byte
191 RequestTimeout time.Duration
[1]192}
193
194type RequestConfig struct {
195 Key []byte
[23]196 BaseURL *url.URL
[1]197}
198
[2]199var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]200
201var HTML_BODY_EXTENSION string = `
[72]202<input type="checkbox" id="mortytoggle" autocomplete="off" />
[1]203<div id="mortyheader">
[72]204 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
[1]205</div>
206<style>
[67]207#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
208#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
209#mortyheader p { padding: 0 0 0.7em 0; display: block; }
210#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
211#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
[1]212input[type=checkbox]#mortytoggle { display: none; }
[72]213input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
[1]214</style>
215`
216
[46]217var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
218<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]219<meta name="referrer" content="no-referrer">
[46]220`
[45]221
[69]222var MORTY_HTML_PAGE_START string = `<!doctype html>
223<html>
224<head>
225<title>MortyProxy</title>
226<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
227<style>
228html { height: 100%; }
229body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
230input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
231input[placeholder] { width:80%; }
232a { text-decoration: none; #2980b9; }
233h1, h2 { font-weight: 200; margin-bottom: 2rem; }
234h1 { font-size: 3em; }
235.container { flex:1; min-height: 100%; margin-bottom: 1em; }
236.footer { margin: 1em; }
237.footer p { font-size: 0.8em; }
238</style>
239</head>
240<body>
241 <div class="container">
242 <h1>MortyProxy</h1>
243`
244
245var MORTY_HTML_PAGE_END string = `
246 </div>
247 <div class="footer">
248 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
249 <a href="https://github.com/asciimoo/morty">view on github</a>
250 </p>
251 </div>
252</body>
253</html>`
254
[67]255var FAVICON_BYTES []byte
256
257func init() {
258 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
259
260 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
261}
262
[1]263func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]264
265 if appRequestHandler(ctx) {
266 return
267 }
268
[1]269 requestHash := popRequestParam(ctx, []byte("mortyhash"))
270
271 requestURI := popRequestParam(ctx, []byte("mortyurl"))
272
273 if requestURI == nil {
[35]274 p.serveMainPage(ctx, 200, nil)
[1]275 return
276 }
277
278 if p.Key != nil {
279 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]280 // HTTP status code 403 : Forbidden
281 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]282 return
283 }
284 }
285
286 parsedURI, err := url.Parse(string(requestURI))
287
[11]288 if err != nil {
[35]289 // HTTP status code 500 : Internal Server Error
290 p.serveMainPage(ctx, 500, err)
[1]291 return
292 }
293
[69]294 // Serve an intermediate page for protocols other than HTTP(S)
295 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
296 p.serveExitMortyPage(ctx, parsedURI)
297 return
298 }
299
[1]300 req := fasthttp.AcquireRequest()
301 defer fasthttp.ReleaseRequest(req)
[12]302 req.SetConnectionClose()
[1]303
[47]304 requestURIStr := string(requestURI)
[1]305
[47]306 log.Println("getting", requestURIStr)
[1]307
[47]308 req.SetRequestURI(requestURIStr)
[62]309 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
[1]310
311 resp := fasthttp.AcquireResponse()
312 defer fasthttp.ReleaseResponse(resp)
313
314 req.Header.SetMethodBytes(ctx.Method())
315 if ctx.IsPost() || ctx.IsPut() {
316 req.SetBody(ctx.PostBody())
317 }
318
[11]319 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
320
321 if err != nil {
[35]322 if err == fasthttp.ErrTimeout {
323 // HTTP status code 504 : Gateway Time-Out
324 p.serveMainPage(ctx, 504, err)
325 } else {
326 // HTTP status code 500 : Internal Server Error
327 p.serveMainPage(ctx, 500, err)
328 }
[1]329 return
330 }
331
332 if resp.StatusCode() != 200 {
333 switch resp.StatusCode() {
[7]334 case 301, 302, 303, 307, 308:
[1]335 loc := resp.Header.Peek("Location")
336 if loc != nil {
[23]337 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
[60]338 url, err := rc.ProxifyURI(loc)
[1]339 if err == nil {
340 ctx.SetStatusCode(resp.StatusCode())
341 ctx.Response.Header.Add("Location", url)
342 log.Println("redirect to", string(loc))
343 return
344 }
345 }
346 }
[47]347 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]348 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]349 return
350 }
351
[68]352 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]353
[68]354 if contentTypeBytes == nil {
[35]355 // HTTP status code 503 : Service Unavailable
356 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]357 return
358 }
359
[68]360 contentTypeString := string(contentTypeBytes)
361
362 // decode Content-Type header
363 contentType, error := contenttype.ParseContentType(contentTypeString)
364 if error != nil {
365 // HTTP status code 503 : Service Unavailable
366 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]367 return
368 }
369
[68]370 // content-disposition
371 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]372
[68]373 // check content type
374 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
375 // it is not a usual content type
376 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
377 // force attachment for allowed content type
378 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
379 } else {
380 // deny access to forbidden content type
381 // HTTP status code 403 : Forbidden
382 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
383 return
384 }
385 }
386
387 // HACK : replace */xhtml by text/html
388 if contentType.SubType == "xhtml" {
389 contentType.TopLevelType = "text"
390 contentType.SubType = "html"
391 contentType.Suffix = ""
392 }
393
394 // conversion to UTF-8
[1]395 var responseBody []byte
396
[68]397 if contentType.TopLevelType == "text" {
398 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]399 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
400 responseBody, err = e.NewDecoder().Bytes(resp.Body())
401 if err != nil {
402 // HTTP status code 503 : Service Unavailable
403 p.serveMainPage(ctx, 503, err)
404 return
405 }
406 } else {
407 responseBody = resp.Body()
[1]408 }
[68]409 // update the charset or specify it
410 contentType.Parameters["charset"] = "UTF-8"
[1]411 } else {
412 responseBody = resp.Body()
413 }
414
[68]415 //
416 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]417
[68]418 // set the content type
419 ctx.SetContentType(contentType.String())
420
421 // output according to MIME type
[1]422 switch {
[68]423 case contentType.SubType == "css" && contentType.Suffix == "":
[23]424 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]425 case contentType.SubType == "html" && contentType.Suffix == "":
[23]426 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]427 default:
[68]428 if contentDispositionBytes != nil {
429 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]430 }
[1]431 ctx.Write(responseBody)
432 }
433}
434
[68]435// force content-disposition to attachment
436func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
437 var contentDispositionParams map[string]string
438
439 if contentDispositionBytes != nil {
440 var err error
441 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
442 if err != nil {
443 contentDispositionParams = make(map[string]string)
444 }
445 } else {
446 contentDispositionParams = make(map[string]string)
447 }
448
449 _, fileNameDefined := contentDispositionParams["filename"]
450 if !fileNameDefined {
451 // TODO : sanitize filename
452 contentDispositionParams["fileName"] = filepath.Base(url.Path)
453 }
454
455 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
456}
457
[10]458func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]459 // serve robots.txt
[10]460 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
461 ctx.SetContentType("text/plain")
462 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
463 return true
464 }
[11]465
[67]466 // server favicon.ico
467 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
468 ctx.SetContentType("image/png")
469 ctx.Write(FAVICON_BYTES)
470 return true
471 }
472
[10]473 return false
474}
475
[1]476func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
477 param := ctx.QueryArgs().PeekBytes(paramName)
478
479 if param == nil {
480 param = ctx.PostArgs().PeekBytes(paramName)
481 if param != nil {
482 ctx.PostArgs().DelBytes(paramName)
483 }
484 } else {
485 ctx.QueryArgs().DelBytes(paramName)
486 }
487
488 return param
489}
490
[9]491func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]492 // TODO
493
494 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
495
496 if urlSlices == nil {
[9]497 out.Write(css)
[1]498 return
499 }
500
501 startIndex := 0
502
503 for _, s := range urlSlices {
[15]504 urlStart := s[4]
505 urlEnd := s[5]
[1]506
[60]507 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]508 out.Write(css[startIndex:urlStart])
509 out.Write([]byte(uri))
[1]510 startIndex = urlEnd
511 } else {
[36]512 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]513 }
514 }
515 if startIndex < len(css) {
[9]516 out.Write(css[startIndex:len(css)])
[1]517 }
518}
519
[9]520func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]521 r := bytes.NewReader(htmlDoc)
522 decoder := html.NewTokenizer(r)
523 decoder.AllowCDATA(true)
524
525 unsafeElements := make([][]byte, 0, 8)
526 state := STATE_DEFAULT
527 for {
528 token := decoder.Next()
529 if token == html.ErrorToken {
530 err := decoder.Err()
531 if err != io.EOF {
532 log.Println("failed to parse HTML:")
533 }
534 break
535 }
536
537 if len(unsafeElements) == 0 {
538
539 switch token {
540 case html.StartTagToken, html.SelfClosingTagToken:
541 tag, hasAttrs := decoder.TagName()
542 safe := !inArray(tag, UNSAFE_ELEMENTS)
543 if !safe {
544 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
545 var unsafeTag []byte = make([]byte, len(tag))
546 copy(unsafeTag, tag)
547 unsafeElements = append(unsafeElements, unsafeTag)
548 }
549 break
550 }
[38]551 if bytes.Equal(tag, []byte("base")) {
552 for {
553 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]554 if bytes.Equal(attrName, []byte("href")) {
555 parsedURI, err := url.Parse(string(attrValue))
556 if err == nil {
557 rc.BaseURL = parsedURI
558 }
[38]559 }
560 if !moreAttr {
561 break
562 }
563 }
564 break
565 }
[1]566 if bytes.Equal(tag, []byte("noscript")) {
567 state = STATE_IN_NOSCRIPT
568 break
569 }
570 var attrs [][][]byte
571 if hasAttrs {
572 for {
573 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]574 attrs = append(attrs, [][]byte{
575 attrName,
576 attrValue,
577 []byte(html.EscapeString(string(attrValue))),
578 })
[1]579 if !moreAttr {
580 break
581 }
582 }
[13]583 }
584 if bytes.Equal(tag, []byte("link")) {
585 sanitizeLinkTag(rc, out, attrs)
586 break
587 }
588
[45]589 if bytes.Equal(tag, []byte("meta")) {
590 sanitizeMetaTag(rc, out, attrs)
591 break
592 }
593
[13]594 fmt.Fprintf(out, "<%s", tag)
595
596 if hasAttrs {
[45]597 sanitizeAttrs(rc, out, attrs)
[1]598 }
[13]599
[1]600 if token == html.SelfClosingTagToken {
[9]601 fmt.Fprintf(out, " />")
[1]602 } else {
[9]603 fmt.Fprintf(out, ">")
[1]604 if bytes.Equal(tag, []byte("style")) {
605 state = STATE_IN_STYLE
606 }
607 }
[13]608
[45]609 if bytes.Equal(tag, []byte("head")) {
[46]610 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]611 }
612
[1]613 if bytes.Equal(tag, []byte("form")) {
614 var formURL *url.URL
615 for _, attr := range attrs {
616 if bytes.Equal(attr[0], []byte("action")) {
617 formURL, _ = url.Parse(string(attr[1]))
[28]618 formURL = mergeURIs(rc.BaseURL, formURL)
[1]619 break
620 }
621 }
622 if formURL == nil {
[23]623 formURL = rc.BaseURL
[1]624 }
[2]625 urlStr := formURL.String()
626 var key string
627 if rc.Key != nil {
628 key = hash(urlStr, rc.Key)
629 }
[9]630 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]631
632 }
633
634 case html.EndTagToken:
635 tag, _ := decoder.TagName()
636 writeEndTag := true
637 switch string(tag) {
638 case "body":
[23]639 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]640 case "style":
641 state = STATE_DEFAULT
642 case "noscript":
643 state = STATE_DEFAULT
644 writeEndTag = false
645 }
646 // skip noscript tags - only the tag, not the content, because javascript is sanitized
647 if writeEndTag {
[9]648 fmt.Fprintf(out, "</%s>", tag)
[1]649 }
650
651 case html.TextToken:
652 switch state {
653 case STATE_DEFAULT:
[9]654 fmt.Fprintf(out, "%s", decoder.Raw())
[1]655 case STATE_IN_STYLE:
[9]656 sanitizeCSS(rc, out, decoder.Raw())
[1]657 case STATE_IN_NOSCRIPT:
[9]658 sanitizeHTML(rc, out, decoder.Raw())
[1]659 }
660
[62]661 case html.CommentToken:
662 // ignore comment. TODO : parse IE conditional comment
663
664 case html.DoctypeToken:
[9]665 out.Write(decoder.Raw())
[1]666 }
667 } else {
668 switch token {
669 case html.StartTagToken:
670 tag, _ := decoder.TagName()
671 if inArray(tag, UNSAFE_ELEMENTS) {
672 unsafeElements = append(unsafeElements, tag)
673 }
674
675 case html.EndTagToken:
676 tag, _ := decoder.TagName()
677 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
678 unsafeElements = unsafeElements[:len(unsafeElements)-1]
679 }
680 }
681 }
682 }
683}
684
[13]685func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
686 exclude := false
687 for _, attr := range attrs {
688 attrName := attr[0]
689 attrValue := attr[1]
690 if bytes.Equal(attrName, []byte("rel")) {
[46]691 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]692 exclude = true
693 break
694 }
695 }
696 if bytes.Equal(attrName, []byte("as")) {
697 if bytes.Equal(attrValue, []byte("script")) {
698 exclude = true
699 break
700 }
701 }
702 }
703
704 if !exclude {
705 out.Write([]byte("<link"))
706 for _, attr := range attrs {
[21]707 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]708 }
709 out.Write([]byte(">"))
710 }
711}
712
[45]713func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]714 var http_equiv []byte
715 var content []byte
716
717 for _, attr := range attrs {
718 attrName := attr[0]
719 attrValue := attr[1]
720 if bytes.Equal(attrName, []byte("http-equiv")) {
721 http_equiv = bytes.ToLower(attrValue)
[46]722 // exclude some <meta http-equiv="..." ..>
723 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
724 return
725 }
[1]726 }
727 if bytes.Equal(attrName, []byte("content")) {
728 content = attrValue
729 }
[45]730 if bytes.Equal(attrName, []byte("charset")) {
731 // exclude <meta charset="...">
732 return
733 }
[1]734 }
735
[45]736 out.Write([]byte("<meta"))
[14]737 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
738 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
739 contentUrl := content[urlIndex+4:]
[36]740 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]741 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]742 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]743 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]744 }
745 }
746 // output proxify result
[60]747 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]748 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]749 }
750 } else {
[46]751 if len(http_equiv) > 0 {
752 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
753 }
[9]754 sanitizeAttrs(rc, out, attrs)
[1]755 }
[45]756 out.Write([]byte(">"))
[1]757}
758
[9]759func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]760 for _, attr := range attrs {
[21]761 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]762 }
763}
764
[21]765func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]766 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]767 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]768 return
769 }
770 switch string(attrName) {
771 case "src", "href", "action":
[60]772 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]773 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]774 } else {
[36]775 log.Println("cannot proxify uri:", string(attrValue))
[1]776 }
777 case "style":
[21]778 cssAttr := bytes.NewBuffer(nil)
779 sanitizeCSS(rc, cssAttr, attrValue)
780 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]781 }
782}
783
[36]784func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]785 if u2 == nil {
786 return u1
787 }
[28]788 return u1.ResolveReference(u2)
[1]789}
790
[60]791// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
792// avoid memory allocation (except for the scheme)
793func sanitizeURI(uri []byte) ([]byte, string) {
794 first_rune_index := 0
795 first_rune_seen := false
796 scheme_last_index := -1
797 buffer := bytes.NewBuffer(make([]byte, 0, 10))
798
799 // remove trailing space and special characters
800 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
801
802 // loop over byte by byte
803 for i, c := range uri {
804 // ignore special characters and space (c <= 32)
805 if c > 32 {
806 // append to the lower case of the rune to buffer
807 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
808 c = c + 'a' - 'A'
809 }
810
811 buffer.WriteByte(c)
812
813 // update the first rune index that is not a special rune
814 if !first_rune_seen {
815 first_rune_index = i
816 first_rune_seen = true
817 }
818
819 if c == ':' {
820 // colon rune found, we have found the scheme
821 scheme_last_index = i
822 break
823 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
824 // special case : most probably a relative URI
825 break
826 }
827 }
828 }
829
830 if scheme_last_index != -1 {
831 // scheme found
832 // copy the "lower case without special runes scheme" before the ":" rune
833 scheme_start_index := scheme_last_index - buffer.Len() + 1
834 copy(uri[scheme_start_index:], buffer.Bytes())
835 // and return the result
836 return uri[scheme_start_index:], buffer.String()
837 } else {
838 // scheme NOT found
839 return uri[first_rune_index:], ""
840 }
841}
842
843func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
844 // sanitize URI
845 uri, scheme := sanitizeURI(uri)
846
[28]847 // remove javascript protocol
[60]848 if scheme == "javascript:" {
[28]849 return "", nil
850 }
[57]851
[1]852 // TODO check malicious data: - e.g. data:script
[60]853 if scheme == "data:" {
[61]854 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
855 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
856 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
857 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
858 bytes.HasPrefix(uri, []byte("data:image/webp")) {
859 // should be safe
860 return string(uri), nil
861 } else {
862 // unsafe data
863 return "", nil
864 }
[1]865 }
866
[57]867 // parse the uri
[60]868 u, err := url.Parse(string(uri))
[1]869 if err != nil {
870 return "", err
871 }
[57]872
873 // get the fragment (with the prefix "#")
874 fragment := ""
875 if len(u.Fragment) > 0 {
876 fragment = "#" + u.Fragment
877 }
878
879 // reset the fragment: it is not included in the mortyurl
880 u.Fragment = ""
881
882 // merge the URI with the document URI
[28]883 u = mergeURIs(rc.BaseURL, u)
[1]884
[57]885 // simple internal link ?
886 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
887 if u.Scheme == rc.BaseURL.Scheme &&
888 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
889 u.Host == rc.BaseURL.Host &&
890 u.Path == rc.BaseURL.Path &&
891 u.RawQuery == rc.BaseURL.RawQuery {
892 // the fragment is the only difference between the document URI and the uri parameter
893 return fragment, nil
894 }
895
896 // return full URI and fragment (if not empty)
[60]897 morty_uri := u.String()
[1]898
899 if rc.Key == nil {
[60]900 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]901 }
[60]902 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]903}
904
905func inArray(b []byte, a [][]byte) bool {
906 for _, b2 := range a {
907 if bytes.Equal(b, b2) {
908 return true
909 }
910 }
911 return false
912}
913
914func hash(msg string, key []byte) string {
915 mac := hmac.New(sha256.New, key)
916 mac.Write([]byte(msg))
917 return hex.EncodeToString(mac.Sum(nil))
918}
919
920func verifyRequestURI(uri, hashMsg, key []byte) bool {
921 h := make([]byte, hex.DecodedLen(len(hashMsg)))
922 _, err := hex.Decode(h, hashMsg)
923 if err != nil {
924 log.Println("hmac error:", err)
925 return false
926 }
927 mac := hmac.New(sha256.New, key)
928 mac.Write(uri)
929 return hmac.Equal(h, mac.Sum(nil))
930}
931
[69]932func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
933 ctx.SetContentType("text/html")
934 ctx.SetStatusCode(403)
935 ctx.Write([]byte(MORTY_HTML_PAGE_START))
936 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
937 ctx.Write([]byte("<p>Following</p><p><a href=\""))
938 ctx.Write([]byte(html.EscapeString(uri.String())))
939 ctx.Write([]byte("\" rel=\"noreferrer\">"))
940 ctx.Write([]byte(html.EscapeString(uri.String())))
941 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
942 ctx.Write([]byte(MORTY_HTML_PAGE_END))
943}
944
[35]945func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]946 ctx.SetContentType("text/html; charset=UTF-8")
[35]947 ctx.SetStatusCode(statusCode)
[69]948 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]949 if err != nil {
950 log.Println("error:", err)
951 ctx.Write([]byte("<h2>Error: "))
952 ctx.Write([]byte(html.EscapeString(err.Error())))
953 ctx.Write([]byte("</h2>"))
954 }
[1]955 if p.Key == nil {
956 ctx.Write([]byte(`
[36]957 <form action="post">
958 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
959 <input type="submit" value="go" />
960 </form>`))
[11]961 } else {
962 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]963 }
[69]964 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]965}
966
967func main() {
968
[2]969 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]970 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[24]971 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[4]972 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]973 flag.Parse()
974
[24]975 if *ipv6 {
976 CLIENT.Dial = fasthttp.DialDualStack
977 }
978
[4]979 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]980
981 if *key != "" {
982 p.Key = []byte(*key)
983 }
984
985 log.Println("listening on", *listen)
986
987 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
988 log.Fatal("Error in ListenAndServe:", err)
989 }
990}
Note: See TracBrowser for help on using the repository browser.