source: code/trunk/morty.go@ 98

Last change on this file since 98 was 98, checked in by mathieu.brunot, 6 years ago

:bug: Simplify DEBUG var init

Signed-off-by: mathieu.brunot <mathieu.brunot@…>

File size: 27.8 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
[68]14 "mime"
[1]15 "net/url"
[78]16 "os"
[68]17 "path/filepath"
[1]18 "regexp"
19 "strings"
[4]20 "time"
[60]21 "unicode/utf8"
[1]22
23 "github.com/valyala/fasthttp"
24 "golang.org/x/net/html"
[45]25 "golang.org/x/net/html/charset"
26 "golang.org/x/text/encoding"
[68]27
28 "github.com/asciimoo/morty/contenttype"
[1]29)
30
31const (
32 STATE_DEFAULT int = 0
33 STATE_IN_STYLE int = 1
34 STATE_IN_NOSCRIPT int = 2
35)
36
[77]37const VERSION = "v0.2.0"
[74]38
[98]39const DEBUG = os.Getenv("DEBUG") == "true"
[96]40
[1]41var CLIENT *fasthttp.Client = &fasthttp.Client{
42 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
43}
44
[27]45var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]46
[68]47var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
48 // html
49 contenttype.NewFilterEquals("text", "html", ""),
50 contenttype.NewFilterEquals("application", "xhtml", "xml"),
51 // css
52 contenttype.NewFilterEquals("text", "css", ""),
53 // images
54 contenttype.NewFilterEquals("image", "gif", ""),
55 contenttype.NewFilterEquals("image", "png", ""),
56 contenttype.NewFilterEquals("image", "jpeg", ""),
57 contenttype.NewFilterEquals("image", "pjpeg", ""),
58 contenttype.NewFilterEquals("image", "webp", ""),
59 contenttype.NewFilterEquals("image", "tiff", ""),
60 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
61 contenttype.NewFilterEquals("image", "bmp", ""),
62 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
[88]63 contenttype.NewFilterEquals("image", "x-icon", ""),
[68]64 // fonts
65 contenttype.NewFilterEquals("application", "font-otf", ""),
66 contenttype.NewFilterEquals("application", "font-ttf", ""),
67 contenttype.NewFilterEquals("application", "font-woff", ""),
68 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
69})
70
71var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
72 // texts
73 contenttype.NewFilterEquals("text", "csv", ""),
74 contenttype.NewFilterEquals("text", "tab-separated-value", ""),
75 contenttype.NewFilterEquals("text", "plain", ""),
76 // API
77 contenttype.NewFilterEquals("application", "json", ""),
78 // Documents
79 contenttype.NewFilterEquals("application", "x-latex", ""),
80 contenttype.NewFilterEquals("application", "pdf", ""),
81 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
82 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
85 // Compressed archives
86 contenttype.NewFilterEquals("application", "zip", ""),
87 contenttype.NewFilterEquals("application", "gzip", ""),
88 contenttype.NewFilterEquals("application", "x-compressed", ""),
89 contenttype.NewFilterEquals("application", "x-gtar", ""),
90 contenttype.NewFilterEquals("application", "x-compress", ""),
91 // Generic binary
92 contenttype.NewFilterEquals("application", "octet-stream", ""),
93})
94
95var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
96 "charset": true,
97}
98
[1]99var UNSAFE_ELEMENTS [][]byte = [][]byte{
100 []byte("applet"),
101 []byte("canvas"),
102 []byte("embed"),
103 //[]byte("iframe"),
[46]104 []byte("math"),
[1]105 []byte("script"),
[46]106 []byte("svg"),
[1]107}
108
109var SAFE_ATTRIBUTES [][]byte = [][]byte{
110 []byte("abbr"),
111 []byte("accesskey"),
112 []byte("align"),
113 []byte("alt"),
[13]114 []byte("as"),
[1]115 []byte("autocomplete"),
116 []byte("charset"),
117 []byte("checked"),
118 []byte("class"),
119 []byte("content"),
120 []byte("contenteditable"),
121 []byte("contextmenu"),
122 []byte("dir"),
123 []byte("for"),
124 []byte("height"),
125 []byte("hidden"),
[46]126 []byte("hreflang"),
[1]127 []byte("id"),
128 []byte("lang"),
129 []byte("media"),
130 []byte("method"),
131 []byte("name"),
132 []byte("nowrap"),
133 []byte("placeholder"),
134 []byte("property"),
135 []byte("rel"),
136 []byte("spellcheck"),
137 []byte("tabindex"),
138 []byte("target"),
139 []byte("title"),
140 []byte("translate"),
141 []byte("type"),
142 []byte("value"),
143 []byte("width"),
144}
145
146var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
147 []byte("area"),
148 []byte("base"),
149 []byte("br"),
150 []byte("col"),
151 []byte("embed"),
152 []byte("hr"),
153 []byte("img"),
154 []byte("input"),
155 []byte("keygen"),
156 []byte("link"),
157 []byte("meta"),
158 []byte("param"),
159 []byte("source"),
160 []byte("track"),
161 []byte("wbr"),
162}
163
[46]164var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
165 []byte("alternate"),
166 []byte("archives"),
167 []byte("author"),
168 []byte("copyright"),
169 []byte("first"),
170 []byte("help"),
171 []byte("icon"),
172 []byte("index"),
173 []byte("last"),
174 []byte("license"),
175 []byte("manifest"),
176 []byte("next"),
177 []byte("pingback"),
178 []byte("prev"),
179 []byte("publisher"),
180 []byte("search"),
181 []byte("shortcut icon"),
182 []byte("stylesheet"),
183 []byte("up"),
184}
185
186var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
187 // X-UA-Compatible will be added automaticaly, so it can be skipped
188 []byte("date"),
189 []byte("last-modified"),
[50]190 []byte("refresh"), // URL rewrite
[46]191 // []byte("location"), TODO URL rewrite
192 []byte("content-language"),
193}
194
[1]195type Proxy struct {
[4]196 Key []byte
197 RequestTimeout time.Duration
[1]198}
199
200type RequestConfig struct {
201 Key []byte
[23]202 BaseURL *url.URL
[1]203}
204
[2]205var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]206
207var HTML_BODY_EXTENSION string = `
[72]208<input type="checkbox" id="mortytoggle" autocomplete="off" />
[1]209<div id="mortyheader">
[72]210 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
[1]211</div>
212<style>
[67]213#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
214#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
215#mortyheader p { padding: 0 0 0.7em 0; display: block; }
216#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
217#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
[1]218input[type=checkbox]#mortytoggle { display: none; }
[72]219input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
[1]220</style>
221`
222
[46]223var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
224<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]225<meta name="referrer" content="no-referrer">
[46]226`
[45]227
[69]228var MORTY_HTML_PAGE_START string = `<!doctype html>
229<html>
230<head>
231<title>MortyProxy</title>
232<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
233<style>
234html { height: 100%; }
235body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
236input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
237input[placeholder] { width:80%; }
238a { text-decoration: none; #2980b9; }
239h1, h2 { font-weight: 200; margin-bottom: 2rem; }
240h1 { font-size: 3em; }
241.container { flex:1; min-height: 100%; margin-bottom: 1em; }
242.footer { margin: 1em; }
243.footer p { font-size: 0.8em; }
244</style>
245</head>
246<body>
247 <div class="container">
248 <h1>MortyProxy</h1>
249`
250
251var MORTY_HTML_PAGE_END string = `
252 </div>
253 <div class="footer">
254 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
255 <a href="https://github.com/asciimoo/morty">view on github</a>
256 </p>
257 </div>
258</body>
259</html>`
260
[67]261var FAVICON_BYTES []byte
262
263func init() {
264 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
265
266 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
267}
268
[1]269func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]270
271 if appRequestHandler(ctx) {
272 return
273 }
274
[1]275 requestHash := popRequestParam(ctx, []byte("mortyhash"))
276
277 requestURI := popRequestParam(ctx, []byte("mortyurl"))
278
279 if requestURI == nil {
[35]280 p.serveMainPage(ctx, 200, nil)
[1]281 return
282 }
283
284 if p.Key != nil {
285 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]286 // HTTP status code 403 : Forbidden
287 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]288 return
289 }
290 }
291
[97]292 parsedURI, err := url.Parse(string(requestURI))
[1]293
[11]294 if err != nil {
[35]295 // HTTP status code 500 : Internal Server Error
296 p.serveMainPage(ctx, 500, err)
[1]297 return
298 }
299
[69]300 // Serve an intermediate page for protocols other than HTTP(S)
301 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
302 p.serveExitMortyPage(ctx, parsedURI)
303 return
304 }
305
[1]306 req := fasthttp.AcquireRequest()
307 defer fasthttp.ReleaseRequest(req)
[12]308 req.SetConnectionClose()
[1]309
[47]310 requestURIStr := string(requestURI)
[1]311
[97]312 if DEBUG {
313 log.Println("getting", requestURIStr)
314 }
[1]315
[47]316 req.SetRequestURI(requestURIStr)
[62]317 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
[1]318
319 resp := fasthttp.AcquireResponse()
320 defer fasthttp.ReleaseResponse(resp)
321
322 req.Header.SetMethodBytes(ctx.Method())
323 if ctx.IsPost() || ctx.IsPut() {
324 req.SetBody(ctx.PostBody())
325 }
326
[11]327 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
328
329 if err != nil {
[35]330 if err == fasthttp.ErrTimeout {
331 // HTTP status code 504 : Gateway Time-Out
332 p.serveMainPage(ctx, 504, err)
333 } else {
334 // HTTP status code 500 : Internal Server Error
335 p.serveMainPage(ctx, 500, err)
336 }
[1]337 return
338 }
339
340 if resp.StatusCode() != 200 {
341 switch resp.StatusCode() {
[7]342 case 301, 302, 303, 307, 308:
[1]343 loc := resp.Header.Peek("Location")
344 if loc != nil {
[97]345 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
346 url, err := rc.ProxifyURI(loc)
347 if err == nil {
348 ctx.SetStatusCode(resp.StatusCode())
349 ctx.Response.Header.Add("Location", url)
350 if DEBUG {
351 log.Println("redirect to", string(loc))
[96]352 }
[1]353 return
354 }
355 }
356 }
[47]357 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]358 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]359 return
360 }
361
[68]362 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]363
[68]364 if contentTypeBytes == nil {
[35]365 // HTTP status code 503 : Service Unavailable
366 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]367 return
368 }
369
[68]370 contentTypeString := string(contentTypeBytes)
371
372 // decode Content-Type header
373 contentType, error := contenttype.ParseContentType(contentTypeString)
374 if error != nil {
375 // HTTP status code 503 : Service Unavailable
376 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]377 return
378 }
379
[68]380 // content-disposition
381 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]382
[68]383 // check content type
384 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
385 // it is not a usual content type
386 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
387 // force attachment for allowed content type
388 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
389 } else {
390 // deny access to forbidden content type
391 // HTTP status code 403 : Forbidden
392 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
393 return
394 }
395 }
396
397 // HACK : replace */xhtml by text/html
398 if contentType.SubType == "xhtml" {
399 contentType.TopLevelType = "text"
400 contentType.SubType = "html"
401 contentType.Suffix = ""
402 }
403
404 // conversion to UTF-8
[1]405 var responseBody []byte
406
[68]407 if contentType.TopLevelType == "text" {
408 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]409 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
410 responseBody, err = e.NewDecoder().Bytes(resp.Body())
411 if err != nil {
412 // HTTP status code 503 : Service Unavailable
413 p.serveMainPage(ctx, 503, err)
414 return
415 }
416 } else {
417 responseBody = resp.Body()
[1]418 }
[68]419 // update the charset or specify it
420 contentType.Parameters["charset"] = "UTF-8"
[1]421 } else {
422 responseBody = resp.Body()
423 }
424
[68]425 //
426 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]427
[68]428 // set the content type
429 ctx.SetContentType(contentType.String())
430
431 // output according to MIME type
[1]432 switch {
[68]433 case contentType.SubType == "css" && contentType.Suffix == "":
[23]434 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]435 case contentType.SubType == "html" && contentType.Suffix == "":
[23]436 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]437 default:
[68]438 if contentDispositionBytes != nil {
439 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]440 }
[1]441 ctx.Write(responseBody)
442 }
443}
444
[68]445// force content-disposition to attachment
446func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
447 var contentDispositionParams map[string]string
448
449 if contentDispositionBytes != nil {
450 var err error
451 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
452 if err != nil {
453 contentDispositionParams = make(map[string]string)
454 }
455 } else {
456 contentDispositionParams = make(map[string]string)
457 }
458
459 _, fileNameDefined := contentDispositionParams["filename"]
460 if !fileNameDefined {
461 // TODO : sanitize filename
462 contentDispositionParams["fileName"] = filepath.Base(url.Path)
463 }
464
465 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
466}
467
[10]468func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]469 // serve robots.txt
[10]470 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
471 ctx.SetContentType("text/plain")
472 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
473 return true
474 }
[11]475
[67]476 // server favicon.ico
477 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
478 ctx.SetContentType("image/png")
479 ctx.Write(FAVICON_BYTES)
480 return true
481 }
482
[10]483 return false
484}
485
[1]486func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
487 param := ctx.QueryArgs().PeekBytes(paramName)
488
489 if param == nil {
490 param = ctx.PostArgs().PeekBytes(paramName)
491 if param != nil {
492 ctx.PostArgs().DelBytes(paramName)
493 }
494 } else {
495 ctx.QueryArgs().DelBytes(paramName)
496 }
497
498 return param
499}
500
[9]501func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]502 // TODO
503
504 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
505
506 if urlSlices == nil {
[9]507 out.Write(css)
[1]508 return
509 }
510
511 startIndex := 0
512
513 for _, s := range urlSlices {
[15]514 urlStart := s[4]
515 urlEnd := s[5]
[1]516
[60]517 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]518 out.Write(css[startIndex:urlStart])
519 out.Write([]byte(uri))
[1]520 startIndex = urlEnd
[97]521 } else if DEBUG {
[36]522 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]523 }
524 }
525 if startIndex < len(css) {
[9]526 out.Write(css[startIndex:len(css)])
[1]527 }
528}
529
[9]530func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]531 r := bytes.NewReader(htmlDoc)
532 decoder := html.NewTokenizer(r)
533 decoder.AllowCDATA(true)
534
535 unsafeElements := make([][]byte, 0, 8)
536 state := STATE_DEFAULT
537 for {
538 token := decoder.Next()
539 if token == html.ErrorToken {
540 err := decoder.Err()
541 if err != io.EOF {
[97]542 log.Println("failed to parse HTML")
[1]543 }
544 break
545 }
546
547 if len(unsafeElements) == 0 {
548
549 switch token {
550 case html.StartTagToken, html.SelfClosingTagToken:
551 tag, hasAttrs := decoder.TagName()
552 safe := !inArray(tag, UNSAFE_ELEMENTS)
553 if !safe {
554 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
555 var unsafeTag []byte = make([]byte, len(tag))
556 copy(unsafeTag, tag)
557 unsafeElements = append(unsafeElements, unsafeTag)
558 }
559 break
560 }
[38]561 if bytes.Equal(tag, []byte("base")) {
562 for {
563 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]564 if bytes.Equal(attrName, []byte("href")) {
565 parsedURI, err := url.Parse(string(attrValue))
566 if err == nil {
567 rc.BaseURL = parsedURI
568 }
[38]569 }
570 if !moreAttr {
571 break
572 }
573 }
574 break
575 }
[1]576 if bytes.Equal(tag, []byte("noscript")) {
577 state = STATE_IN_NOSCRIPT
578 break
579 }
580 var attrs [][][]byte
581 if hasAttrs {
582 for {
583 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]584 attrs = append(attrs, [][]byte{
585 attrName,
586 attrValue,
587 []byte(html.EscapeString(string(attrValue))),
588 })
[1]589 if !moreAttr {
590 break
591 }
592 }
[13]593 }
594 if bytes.Equal(tag, []byte("link")) {
595 sanitizeLinkTag(rc, out, attrs)
596 break
597 }
598
[45]599 if bytes.Equal(tag, []byte("meta")) {
600 sanitizeMetaTag(rc, out, attrs)
601 break
602 }
603
[13]604 fmt.Fprintf(out, "<%s", tag)
605
606 if hasAttrs {
[45]607 sanitizeAttrs(rc, out, attrs)
[1]608 }
[13]609
[1]610 if token == html.SelfClosingTagToken {
[9]611 fmt.Fprintf(out, " />")
[1]612 } else {
[9]613 fmt.Fprintf(out, ">")
[1]614 if bytes.Equal(tag, []byte("style")) {
615 state = STATE_IN_STYLE
616 }
617 }
[13]618
[45]619 if bytes.Equal(tag, []byte("head")) {
[46]620 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]621 }
622
[1]623 if bytes.Equal(tag, []byte("form")) {
624 var formURL *url.URL
625 for _, attr := range attrs {
626 if bytes.Equal(attr[0], []byte("action")) {
627 formURL, _ = url.Parse(string(attr[1]))
[28]628 formURL = mergeURIs(rc.BaseURL, formURL)
[1]629 break
630 }
631 }
632 if formURL == nil {
[23]633 formURL = rc.BaseURL
[1]634 }
[2]635 urlStr := formURL.String()
636 var key string
637 if rc.Key != nil {
638 key = hash(urlStr, rc.Key)
639 }
[9]640 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]641
642 }
643
644 case html.EndTagToken:
645 tag, _ := decoder.TagName()
646 writeEndTag := true
647 switch string(tag) {
648 case "body":
[23]649 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]650 case "style":
651 state = STATE_DEFAULT
652 case "noscript":
653 state = STATE_DEFAULT
654 writeEndTag = false
655 }
656 // skip noscript tags - only the tag, not the content, because javascript is sanitized
657 if writeEndTag {
[9]658 fmt.Fprintf(out, "</%s>", tag)
[1]659 }
660
661 case html.TextToken:
662 switch state {
663 case STATE_DEFAULT:
[9]664 fmt.Fprintf(out, "%s", decoder.Raw())
[1]665 case STATE_IN_STYLE:
[9]666 sanitizeCSS(rc, out, decoder.Raw())
[1]667 case STATE_IN_NOSCRIPT:
[9]668 sanitizeHTML(rc, out, decoder.Raw())
[1]669 }
670
[62]671 case html.CommentToken:
672 // ignore comment. TODO : parse IE conditional comment
673
674 case html.DoctypeToken:
[9]675 out.Write(decoder.Raw())
[1]676 }
677 } else {
678 switch token {
679 case html.StartTagToken:
680 tag, _ := decoder.TagName()
681 if inArray(tag, UNSAFE_ELEMENTS) {
682 unsafeElements = append(unsafeElements, tag)
683 }
684
685 case html.EndTagToken:
686 tag, _ := decoder.TagName()
687 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
688 unsafeElements = unsafeElements[:len(unsafeElements)-1]
689 }
690 }
691 }
692 }
693}
694
[13]695func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
696 exclude := false
697 for _, attr := range attrs {
698 attrName := attr[0]
699 attrValue := attr[1]
700 if bytes.Equal(attrName, []byte("rel")) {
[46]701 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]702 exclude = true
703 break
704 }
705 }
706 if bytes.Equal(attrName, []byte("as")) {
707 if bytes.Equal(attrValue, []byte("script")) {
708 exclude = true
709 break
710 }
711 }
712 }
713
714 if !exclude {
715 out.Write([]byte("<link"))
716 for _, attr := range attrs {
[21]717 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]718 }
719 out.Write([]byte(">"))
720 }
721}
722
[45]723func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]724 var http_equiv []byte
725 var content []byte
726
727 for _, attr := range attrs {
728 attrName := attr[0]
729 attrValue := attr[1]
730 if bytes.Equal(attrName, []byte("http-equiv")) {
731 http_equiv = bytes.ToLower(attrValue)
[46]732 // exclude some <meta http-equiv="..." ..>
733 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
734 return
735 }
[1]736 }
737 if bytes.Equal(attrName, []byte("content")) {
738 content = attrValue
739 }
[45]740 if bytes.Equal(attrName, []byte("charset")) {
741 // exclude <meta charset="...">
742 return
743 }
[1]744 }
745
[45]746 out.Write([]byte("<meta"))
[14]747 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
748 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
749 contentUrl := content[urlIndex+4:]
[36]750 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]751 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]752 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]753 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]754 }
755 }
756 // output proxify result
[60]757 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]758 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]759 }
760 } else {
[46]761 if len(http_equiv) > 0 {
762 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
763 }
[9]764 sanitizeAttrs(rc, out, attrs)
[1]765 }
[45]766 out.Write([]byte(">"))
[1]767}
768
[9]769func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]770 for _, attr := range attrs {
[21]771 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]772 }
773}
774
[21]775func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]776 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]777 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]778 return
779 }
780 switch string(attrName) {
781 case "src", "href", "action":
[60]782 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]783 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[97]784 } else if DEBUG {
[36]785 log.Println("cannot proxify uri:", string(attrValue))
[1]786 }
787 case "style":
[21]788 cssAttr := bytes.NewBuffer(nil)
789 sanitizeCSS(rc, cssAttr, attrValue)
790 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]791 }
792}
793
[36]794func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]795 if u2 == nil {
796 return u1
797 }
[28]798 return u1.ResolveReference(u2)
[1]799}
800
[60]801// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
802// avoid memory allocation (except for the scheme)
803func sanitizeURI(uri []byte) ([]byte, string) {
804 first_rune_index := 0
805 first_rune_seen := false
806 scheme_last_index := -1
807 buffer := bytes.NewBuffer(make([]byte, 0, 10))
808
809 // remove trailing space and special characters
810 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
811
812 // loop over byte by byte
813 for i, c := range uri {
814 // ignore special characters and space (c <= 32)
815 if c > 32 {
816 // append to the lower case of the rune to buffer
817 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
818 c = c + 'a' - 'A'
819 }
820
821 buffer.WriteByte(c)
822
823 // update the first rune index that is not a special rune
824 if !first_rune_seen {
825 first_rune_index = i
826 first_rune_seen = true
827 }
828
829 if c == ':' {
830 // colon rune found, we have found the scheme
831 scheme_last_index = i
832 break
833 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
834 // special case : most probably a relative URI
835 break
836 }
837 }
838 }
839
840 if scheme_last_index != -1 {
841 // scheme found
842 // copy the "lower case without special runes scheme" before the ":" rune
843 scheme_start_index := scheme_last_index - buffer.Len() + 1
844 copy(uri[scheme_start_index:], buffer.Bytes())
845 // and return the result
846 return uri[scheme_start_index:], buffer.String()
847 } else {
848 // scheme NOT found
849 return uri[first_rune_index:], ""
850 }
851}
852
853func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
854 // sanitize URI
855 uri, scheme := sanitizeURI(uri)
856
[28]857 // remove javascript protocol
[60]858 if scheme == "javascript:" {
[28]859 return "", nil
860 }
[57]861
[1]862 // TODO check malicious data: - e.g. data:script
[60]863 if scheme == "data:" {
[61]864 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
865 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
866 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
867 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
868 bytes.HasPrefix(uri, []byte("data:image/webp")) {
869 // should be safe
870 return string(uri), nil
871 } else {
872 // unsafe data
873 return "", nil
874 }
[1]875 }
876
[57]877 // parse the uri
[60]878 u, err := url.Parse(string(uri))
[1]879 if err != nil {
880 return "", err
881 }
[57]882
883 // get the fragment (with the prefix "#")
884 fragment := ""
885 if len(u.Fragment) > 0 {
886 fragment = "#" + u.Fragment
887 }
888
889 // reset the fragment: it is not included in the mortyurl
890 u.Fragment = ""
891
892 // merge the URI with the document URI
[28]893 u = mergeURIs(rc.BaseURL, u)
[1]894
[57]895 // simple internal link ?
896 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
897 if u.Scheme == rc.BaseURL.Scheme &&
898 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
899 u.Host == rc.BaseURL.Host &&
900 u.Path == rc.BaseURL.Path &&
901 u.RawQuery == rc.BaseURL.RawQuery {
902 // the fragment is the only difference between the document URI and the uri parameter
903 return fragment, nil
904 }
905
906 // return full URI and fragment (if not empty)
[60]907 morty_uri := u.String()
[1]908
909 if rc.Key == nil {
[60]910 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]911 }
[60]912 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]913}
914
915func inArray(b []byte, a [][]byte) bool {
916 for _, b2 := range a {
917 if bytes.Equal(b, b2) {
918 return true
919 }
920 }
921 return false
922}
923
924func hash(msg string, key []byte) string {
925 mac := hmac.New(sha256.New, key)
926 mac.Write([]byte(msg))
927 return hex.EncodeToString(mac.Sum(nil))
928}
929
930func verifyRequestURI(uri, hashMsg, key []byte) bool {
931 h := make([]byte, hex.DecodedLen(len(hashMsg)))
932 _, err := hex.Decode(h, hashMsg)
933 if err != nil {
[97]934 if DEBUG {
935 log.Println("hmac error:", err)
936 }
[1]937 return false
938 }
939 mac := hmac.New(sha256.New, key)
940 mac.Write(uri)
941 return hmac.Equal(h, mac.Sum(nil))
942}
943
[69]944func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
945 ctx.SetContentType("text/html")
946 ctx.SetStatusCode(403)
947 ctx.Write([]byte(MORTY_HTML_PAGE_START))
948 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
949 ctx.Write([]byte("<p>Following</p><p><a href=\""))
950 ctx.Write([]byte(html.EscapeString(uri.String())))
951 ctx.Write([]byte("\" rel=\"noreferrer\">"))
952 ctx.Write([]byte(html.EscapeString(uri.String())))
953 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
954 ctx.Write([]byte(MORTY_HTML_PAGE_END))
955}
956
[35]957func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]958 ctx.SetContentType("text/html; charset=UTF-8")
[35]959 ctx.SetStatusCode(statusCode)
[69]960 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]961 if err != nil {
[97]962 if DEBUG {
963 log.Println("error:", err)
964 }
[11]965 ctx.Write([]byte("<h2>Error: "))
966 ctx.Write([]byte(html.EscapeString(err.Error())))
967 ctx.Write([]byte("</h2>"))
968 }
[1]969 if p.Key == nil {
970 ctx.Write([]byte(`
[36]971 <form action="post">
972 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
973 <input type="submit" value="go" />
974 </form>`))
[11]975 } else {
976 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]977 }
[69]978 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]979}
980
981func main() {
[78]982 default_listen_addr := os.Getenv("MORTY_ADDRESS")
983 if default_listen_addr == "" {
984 default_listen_addr = "127.0.0.1:3000"
985 }
986 default_key := os.Getenv("MORTY_KEY")
987 listen := flag.String("listen", default_listen_addr, "Listen address")
[92]988 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
[24]989 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[74]990 version := flag.Bool("version", false, "Show version")
[4]991 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]992 flag.Parse()
993
[74]994 if *version {
995 fmt.Println(VERSION)
996 return
997 }
998
[24]999 if *ipv6 {
1000 CLIENT.Dial = fasthttp.DialDualStack
1001 }
1002
[4]1003 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]1004
1005 if *key != "" {
[92]1006 var err error
1007 p.Key, err = base64.StdEncoding.DecodeString(*key)
[94]1008 if err != nil {
1009 log.Fatal("Error parsing -key", err.Error())
1010 os.Exit(1)
[92]1011 }
[1]1012 }
1013
1014 log.Println("listening on", *listen)
1015
1016 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1017 log.Fatal("Error in ListenAndServe:", err)
1018 }
1019}
Note: See TracBrowser for help on using the repository browser.