source: code/trunk/morty.go@ 97

Last change on this file since 97 was 97, checked in by mathieu.brunot, 6 years ago

:sparkles: Var to enable/disable debug logs

Signed-off-by: mathieu.brunot <mathieu.brunot@…>

File size: 27.9 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
[68]14 "mime"
[1]15 "net/url"
[78]16 "os"
[68]17 "path/filepath"
[1]18 "regexp"
19 "strings"
[4]20 "time"
[60]21 "unicode/utf8"
[1]22
23 "github.com/valyala/fasthttp"
24 "golang.org/x/net/html"
[45]25 "golang.org/x/net/html/charset"
26 "golang.org/x/text/encoding"
[68]27
28 "github.com/asciimoo/morty/contenttype"
[1]29)
30
31const (
32 STATE_DEFAULT int = 0
33 STATE_IN_STYLE int = 1
34 STATE_IN_NOSCRIPT int = 2
35)
36
[77]37const VERSION = "v0.2.0"
[74]38
[97]39var DEBUG = os.Getenv("DEBUG")
40if DEBUG == "true" {
41 DEBUG = true
42} else {
43 DEBUG = false
44}
[96]45
[1]46var CLIENT *fasthttp.Client = &fasthttp.Client{
47 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
48}
49
[27]50var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]51
[68]52var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
53 // html
54 contenttype.NewFilterEquals("text", "html", ""),
55 contenttype.NewFilterEquals("application", "xhtml", "xml"),
56 // css
57 contenttype.NewFilterEquals("text", "css", ""),
58 // images
59 contenttype.NewFilterEquals("image", "gif", ""),
60 contenttype.NewFilterEquals("image", "png", ""),
61 contenttype.NewFilterEquals("image", "jpeg", ""),
62 contenttype.NewFilterEquals("image", "pjpeg", ""),
63 contenttype.NewFilterEquals("image", "webp", ""),
64 contenttype.NewFilterEquals("image", "tiff", ""),
65 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
66 contenttype.NewFilterEquals("image", "bmp", ""),
67 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
[88]68 contenttype.NewFilterEquals("image", "x-icon", ""),
[68]69 // fonts
70 contenttype.NewFilterEquals("application", "font-otf", ""),
71 contenttype.NewFilterEquals("application", "font-ttf", ""),
72 contenttype.NewFilterEquals("application", "font-woff", ""),
73 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
74})
75
76var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
77 // texts
78 contenttype.NewFilterEquals("text", "csv", ""),
79 contenttype.NewFilterEquals("text", "tab-separated-value", ""),
80 contenttype.NewFilterEquals("text", "plain", ""),
81 // API
82 contenttype.NewFilterEquals("application", "json", ""),
83 // Documents
84 contenttype.NewFilterEquals("application", "x-latex", ""),
85 contenttype.NewFilterEquals("application", "pdf", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
88 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
89 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
90 // Compressed archives
91 contenttype.NewFilterEquals("application", "zip", ""),
92 contenttype.NewFilterEquals("application", "gzip", ""),
93 contenttype.NewFilterEquals("application", "x-compressed", ""),
94 contenttype.NewFilterEquals("application", "x-gtar", ""),
95 contenttype.NewFilterEquals("application", "x-compress", ""),
96 // Generic binary
97 contenttype.NewFilterEquals("application", "octet-stream", ""),
98})
99
100var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
101 "charset": true,
102}
103
[1]104var UNSAFE_ELEMENTS [][]byte = [][]byte{
105 []byte("applet"),
106 []byte("canvas"),
107 []byte("embed"),
108 //[]byte("iframe"),
[46]109 []byte("math"),
[1]110 []byte("script"),
[46]111 []byte("svg"),
[1]112}
113
114var SAFE_ATTRIBUTES [][]byte = [][]byte{
115 []byte("abbr"),
116 []byte("accesskey"),
117 []byte("align"),
118 []byte("alt"),
[13]119 []byte("as"),
[1]120 []byte("autocomplete"),
121 []byte("charset"),
122 []byte("checked"),
123 []byte("class"),
124 []byte("content"),
125 []byte("contenteditable"),
126 []byte("contextmenu"),
127 []byte("dir"),
128 []byte("for"),
129 []byte("height"),
130 []byte("hidden"),
[46]131 []byte("hreflang"),
[1]132 []byte("id"),
133 []byte("lang"),
134 []byte("media"),
135 []byte("method"),
136 []byte("name"),
137 []byte("nowrap"),
138 []byte("placeholder"),
139 []byte("property"),
140 []byte("rel"),
141 []byte("spellcheck"),
142 []byte("tabindex"),
143 []byte("target"),
144 []byte("title"),
145 []byte("translate"),
146 []byte("type"),
147 []byte("value"),
148 []byte("width"),
149}
150
151var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
152 []byte("area"),
153 []byte("base"),
154 []byte("br"),
155 []byte("col"),
156 []byte("embed"),
157 []byte("hr"),
158 []byte("img"),
159 []byte("input"),
160 []byte("keygen"),
161 []byte("link"),
162 []byte("meta"),
163 []byte("param"),
164 []byte("source"),
165 []byte("track"),
166 []byte("wbr"),
167}
168
[46]169var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
170 []byte("alternate"),
171 []byte("archives"),
172 []byte("author"),
173 []byte("copyright"),
174 []byte("first"),
175 []byte("help"),
176 []byte("icon"),
177 []byte("index"),
178 []byte("last"),
179 []byte("license"),
180 []byte("manifest"),
181 []byte("next"),
182 []byte("pingback"),
183 []byte("prev"),
184 []byte("publisher"),
185 []byte("search"),
186 []byte("shortcut icon"),
187 []byte("stylesheet"),
188 []byte("up"),
189}
190
191var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
192 // X-UA-Compatible will be added automaticaly, so it can be skipped
193 []byte("date"),
194 []byte("last-modified"),
[50]195 []byte("refresh"), // URL rewrite
[46]196 // []byte("location"), TODO URL rewrite
197 []byte("content-language"),
198}
199
[1]200type Proxy struct {
[4]201 Key []byte
202 RequestTimeout time.Duration
[1]203}
204
205type RequestConfig struct {
206 Key []byte
[23]207 BaseURL *url.URL
[1]208}
209
[2]210var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]211
212var HTML_BODY_EXTENSION string = `
[72]213<input type="checkbox" id="mortytoggle" autocomplete="off" />
[1]214<div id="mortyheader">
[72]215 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
[1]216</div>
217<style>
[67]218#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
219#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
220#mortyheader p { padding: 0 0 0.7em 0; display: block; }
221#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
222#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
[1]223input[type=checkbox]#mortytoggle { display: none; }
[72]224input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
[1]225</style>
226`
227
[46]228var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
229<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]230<meta name="referrer" content="no-referrer">
[46]231`
[45]232
[69]233var MORTY_HTML_PAGE_START string = `<!doctype html>
234<html>
235<head>
236<title>MortyProxy</title>
237<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
238<style>
239html { height: 100%; }
240body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
241input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
242input[placeholder] { width:80%; }
243a { text-decoration: none; #2980b9; }
244h1, h2 { font-weight: 200; margin-bottom: 2rem; }
245h1 { font-size: 3em; }
246.container { flex:1; min-height: 100%; margin-bottom: 1em; }
247.footer { margin: 1em; }
248.footer p { font-size: 0.8em; }
249</style>
250</head>
251<body>
252 <div class="container">
253 <h1>MortyProxy</h1>
254`
255
256var MORTY_HTML_PAGE_END string = `
257 </div>
258 <div class="footer">
259 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
260 <a href="https://github.com/asciimoo/morty">view on github</a>
261 </p>
262 </div>
263</body>
264</html>`
265
[67]266var FAVICON_BYTES []byte
267
268func init() {
269 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
270
271 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
272}
273
[1]274func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]275
276 if appRequestHandler(ctx) {
277 return
278 }
279
[1]280 requestHash := popRequestParam(ctx, []byte("mortyhash"))
281
282 requestURI := popRequestParam(ctx, []byte("mortyurl"))
283
284 if requestURI == nil {
[35]285 p.serveMainPage(ctx, 200, nil)
[1]286 return
287 }
288
289 if p.Key != nil {
290 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]291 // HTTP status code 403 : Forbidden
292 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]293 return
294 }
295 }
296
[97]297 parsedURI, err := url.Parse(string(requestURI))
[1]298
[11]299 if err != nil {
[35]300 // HTTP status code 500 : Internal Server Error
301 p.serveMainPage(ctx, 500, err)
[1]302 return
303 }
304
[69]305 // Serve an intermediate page for protocols other than HTTP(S)
306 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
307 p.serveExitMortyPage(ctx, parsedURI)
308 return
309 }
310
[1]311 req := fasthttp.AcquireRequest()
312 defer fasthttp.ReleaseRequest(req)
[12]313 req.SetConnectionClose()
[1]314
[47]315 requestURIStr := string(requestURI)
[1]316
[97]317 if DEBUG {
318 log.Println("getting", requestURIStr)
319 }
[1]320
[47]321 req.SetRequestURI(requestURIStr)
[62]322 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
[1]323
324 resp := fasthttp.AcquireResponse()
325 defer fasthttp.ReleaseResponse(resp)
326
327 req.Header.SetMethodBytes(ctx.Method())
328 if ctx.IsPost() || ctx.IsPut() {
329 req.SetBody(ctx.PostBody())
330 }
331
[11]332 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
333
334 if err != nil {
[35]335 if err == fasthttp.ErrTimeout {
336 // HTTP status code 504 : Gateway Time-Out
337 p.serveMainPage(ctx, 504, err)
338 } else {
339 // HTTP status code 500 : Internal Server Error
340 p.serveMainPage(ctx, 500, err)
341 }
[1]342 return
343 }
344
345 if resp.StatusCode() != 200 {
346 switch resp.StatusCode() {
[7]347 case 301, 302, 303, 307, 308:
[1]348 loc := resp.Header.Peek("Location")
349 if loc != nil {
[97]350 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
351 url, err := rc.ProxifyURI(loc)
352 if err == nil {
353 ctx.SetStatusCode(resp.StatusCode())
354 ctx.Response.Header.Add("Location", url)
355 if DEBUG {
356 log.Println("redirect to", string(loc))
[96]357 }
[1]358 return
359 }
360 }
361 }
[47]362 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]363 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]364 return
365 }
366
[68]367 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]368
[68]369 if contentTypeBytes == nil {
[35]370 // HTTP status code 503 : Service Unavailable
371 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]372 return
373 }
374
[68]375 contentTypeString := string(contentTypeBytes)
376
377 // decode Content-Type header
378 contentType, error := contenttype.ParseContentType(contentTypeString)
379 if error != nil {
380 // HTTP status code 503 : Service Unavailable
381 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]382 return
383 }
384
[68]385 // content-disposition
386 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]387
[68]388 // check content type
389 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
390 // it is not a usual content type
391 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
392 // force attachment for allowed content type
393 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
394 } else {
395 // deny access to forbidden content type
396 // HTTP status code 403 : Forbidden
397 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
398 return
399 }
400 }
401
402 // HACK : replace */xhtml by text/html
403 if contentType.SubType == "xhtml" {
404 contentType.TopLevelType = "text"
405 contentType.SubType = "html"
406 contentType.Suffix = ""
407 }
408
409 // conversion to UTF-8
[1]410 var responseBody []byte
411
[68]412 if contentType.TopLevelType == "text" {
413 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]414 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
415 responseBody, err = e.NewDecoder().Bytes(resp.Body())
416 if err != nil {
417 // HTTP status code 503 : Service Unavailable
418 p.serveMainPage(ctx, 503, err)
419 return
420 }
421 } else {
422 responseBody = resp.Body()
[1]423 }
[68]424 // update the charset or specify it
425 contentType.Parameters["charset"] = "UTF-8"
[1]426 } else {
427 responseBody = resp.Body()
428 }
429
[68]430 //
431 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]432
[68]433 // set the content type
434 ctx.SetContentType(contentType.String())
435
436 // output according to MIME type
[1]437 switch {
[68]438 case contentType.SubType == "css" && contentType.Suffix == "":
[23]439 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]440 case contentType.SubType == "html" && contentType.Suffix == "":
[23]441 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]442 default:
[68]443 if contentDispositionBytes != nil {
444 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]445 }
[1]446 ctx.Write(responseBody)
447 }
448}
449
[68]450// force content-disposition to attachment
451func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
452 var contentDispositionParams map[string]string
453
454 if contentDispositionBytes != nil {
455 var err error
456 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
457 if err != nil {
458 contentDispositionParams = make(map[string]string)
459 }
460 } else {
461 contentDispositionParams = make(map[string]string)
462 }
463
464 _, fileNameDefined := contentDispositionParams["filename"]
465 if !fileNameDefined {
466 // TODO : sanitize filename
467 contentDispositionParams["fileName"] = filepath.Base(url.Path)
468 }
469
470 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
471}
472
[10]473func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]474 // serve robots.txt
[10]475 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
476 ctx.SetContentType("text/plain")
477 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
478 return true
479 }
[11]480
[67]481 // server favicon.ico
482 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
483 ctx.SetContentType("image/png")
484 ctx.Write(FAVICON_BYTES)
485 return true
486 }
487
[10]488 return false
489}
490
[1]491func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
492 param := ctx.QueryArgs().PeekBytes(paramName)
493
494 if param == nil {
495 param = ctx.PostArgs().PeekBytes(paramName)
496 if param != nil {
497 ctx.PostArgs().DelBytes(paramName)
498 }
499 } else {
500 ctx.QueryArgs().DelBytes(paramName)
501 }
502
503 return param
504}
505
[9]506func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]507 // TODO
508
509 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
510
511 if urlSlices == nil {
[9]512 out.Write(css)
[1]513 return
514 }
515
516 startIndex := 0
517
518 for _, s := range urlSlices {
[15]519 urlStart := s[4]
520 urlEnd := s[5]
[1]521
[60]522 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]523 out.Write(css[startIndex:urlStart])
524 out.Write([]byte(uri))
[1]525 startIndex = urlEnd
[97]526 } else if DEBUG {
[36]527 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]528 }
529 }
530 if startIndex < len(css) {
[9]531 out.Write(css[startIndex:len(css)])
[1]532 }
533}
534
[9]535func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]536 r := bytes.NewReader(htmlDoc)
537 decoder := html.NewTokenizer(r)
538 decoder.AllowCDATA(true)
539
540 unsafeElements := make([][]byte, 0, 8)
541 state := STATE_DEFAULT
542 for {
543 token := decoder.Next()
544 if token == html.ErrorToken {
545 err := decoder.Err()
546 if err != io.EOF {
[97]547 log.Println("failed to parse HTML")
[1]548 }
549 break
550 }
551
552 if len(unsafeElements) == 0 {
553
554 switch token {
555 case html.StartTagToken, html.SelfClosingTagToken:
556 tag, hasAttrs := decoder.TagName()
557 safe := !inArray(tag, UNSAFE_ELEMENTS)
558 if !safe {
559 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
560 var unsafeTag []byte = make([]byte, len(tag))
561 copy(unsafeTag, tag)
562 unsafeElements = append(unsafeElements, unsafeTag)
563 }
564 break
565 }
[38]566 if bytes.Equal(tag, []byte("base")) {
567 for {
568 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]569 if bytes.Equal(attrName, []byte("href")) {
570 parsedURI, err := url.Parse(string(attrValue))
571 if err == nil {
572 rc.BaseURL = parsedURI
573 }
[38]574 }
575 if !moreAttr {
576 break
577 }
578 }
579 break
580 }
[1]581 if bytes.Equal(tag, []byte("noscript")) {
582 state = STATE_IN_NOSCRIPT
583 break
584 }
585 var attrs [][][]byte
586 if hasAttrs {
587 for {
588 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]589 attrs = append(attrs, [][]byte{
590 attrName,
591 attrValue,
592 []byte(html.EscapeString(string(attrValue))),
593 })
[1]594 if !moreAttr {
595 break
596 }
597 }
[13]598 }
599 if bytes.Equal(tag, []byte("link")) {
600 sanitizeLinkTag(rc, out, attrs)
601 break
602 }
603
[45]604 if bytes.Equal(tag, []byte("meta")) {
605 sanitizeMetaTag(rc, out, attrs)
606 break
607 }
608
[13]609 fmt.Fprintf(out, "<%s", tag)
610
611 if hasAttrs {
[45]612 sanitizeAttrs(rc, out, attrs)
[1]613 }
[13]614
[1]615 if token == html.SelfClosingTagToken {
[9]616 fmt.Fprintf(out, " />")
[1]617 } else {
[9]618 fmt.Fprintf(out, ">")
[1]619 if bytes.Equal(tag, []byte("style")) {
620 state = STATE_IN_STYLE
621 }
622 }
[13]623
[45]624 if bytes.Equal(tag, []byte("head")) {
[46]625 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]626 }
627
[1]628 if bytes.Equal(tag, []byte("form")) {
629 var formURL *url.URL
630 for _, attr := range attrs {
631 if bytes.Equal(attr[0], []byte("action")) {
632 formURL, _ = url.Parse(string(attr[1]))
[28]633 formURL = mergeURIs(rc.BaseURL, formURL)
[1]634 break
635 }
636 }
637 if formURL == nil {
[23]638 formURL = rc.BaseURL
[1]639 }
[2]640 urlStr := formURL.String()
641 var key string
642 if rc.Key != nil {
643 key = hash(urlStr, rc.Key)
644 }
[9]645 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]646
647 }
648
649 case html.EndTagToken:
650 tag, _ := decoder.TagName()
651 writeEndTag := true
652 switch string(tag) {
653 case "body":
[23]654 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]655 case "style":
656 state = STATE_DEFAULT
657 case "noscript":
658 state = STATE_DEFAULT
659 writeEndTag = false
660 }
661 // skip noscript tags - only the tag, not the content, because javascript is sanitized
662 if writeEndTag {
[9]663 fmt.Fprintf(out, "</%s>", tag)
[1]664 }
665
666 case html.TextToken:
667 switch state {
668 case STATE_DEFAULT:
[9]669 fmt.Fprintf(out, "%s", decoder.Raw())
[1]670 case STATE_IN_STYLE:
[9]671 sanitizeCSS(rc, out, decoder.Raw())
[1]672 case STATE_IN_NOSCRIPT:
[9]673 sanitizeHTML(rc, out, decoder.Raw())
[1]674 }
675
[62]676 case html.CommentToken:
677 // ignore comment. TODO : parse IE conditional comment
678
679 case html.DoctypeToken:
[9]680 out.Write(decoder.Raw())
[1]681 }
682 } else {
683 switch token {
684 case html.StartTagToken:
685 tag, _ := decoder.TagName()
686 if inArray(tag, UNSAFE_ELEMENTS) {
687 unsafeElements = append(unsafeElements, tag)
688 }
689
690 case html.EndTagToken:
691 tag, _ := decoder.TagName()
692 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
693 unsafeElements = unsafeElements[:len(unsafeElements)-1]
694 }
695 }
696 }
697 }
698}
699
[13]700func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
701 exclude := false
702 for _, attr := range attrs {
703 attrName := attr[0]
704 attrValue := attr[1]
705 if bytes.Equal(attrName, []byte("rel")) {
[46]706 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]707 exclude = true
708 break
709 }
710 }
711 if bytes.Equal(attrName, []byte("as")) {
712 if bytes.Equal(attrValue, []byte("script")) {
713 exclude = true
714 break
715 }
716 }
717 }
718
719 if !exclude {
720 out.Write([]byte("<link"))
721 for _, attr := range attrs {
[21]722 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]723 }
724 out.Write([]byte(">"))
725 }
726}
727
[45]728func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]729 var http_equiv []byte
730 var content []byte
731
732 for _, attr := range attrs {
733 attrName := attr[0]
734 attrValue := attr[1]
735 if bytes.Equal(attrName, []byte("http-equiv")) {
736 http_equiv = bytes.ToLower(attrValue)
[46]737 // exclude some <meta http-equiv="..." ..>
738 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
739 return
740 }
[1]741 }
742 if bytes.Equal(attrName, []byte("content")) {
743 content = attrValue
744 }
[45]745 if bytes.Equal(attrName, []byte("charset")) {
746 // exclude <meta charset="...">
747 return
748 }
[1]749 }
750
[45]751 out.Write([]byte("<meta"))
[14]752 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
753 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
754 contentUrl := content[urlIndex+4:]
[36]755 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]756 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]757 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]758 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]759 }
760 }
761 // output proxify result
[60]762 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]763 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]764 }
765 } else {
[46]766 if len(http_equiv) > 0 {
767 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
768 }
[9]769 sanitizeAttrs(rc, out, attrs)
[1]770 }
[45]771 out.Write([]byte(">"))
[1]772}
773
[9]774func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]775 for _, attr := range attrs {
[21]776 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]777 }
778}
779
[21]780func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]781 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]782 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]783 return
784 }
785 switch string(attrName) {
786 case "src", "href", "action":
[60]787 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]788 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[97]789 } else if DEBUG {
[36]790 log.Println("cannot proxify uri:", string(attrValue))
[1]791 }
792 case "style":
[21]793 cssAttr := bytes.NewBuffer(nil)
794 sanitizeCSS(rc, cssAttr, attrValue)
795 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]796 }
797}
798
[36]799func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]800 if u2 == nil {
801 return u1
802 }
[28]803 return u1.ResolveReference(u2)
[1]804}
805
[60]806// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
807// avoid memory allocation (except for the scheme)
808func sanitizeURI(uri []byte) ([]byte, string) {
809 first_rune_index := 0
810 first_rune_seen := false
811 scheme_last_index := -1
812 buffer := bytes.NewBuffer(make([]byte, 0, 10))
813
814 // remove trailing space and special characters
815 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
816
817 // loop over byte by byte
818 for i, c := range uri {
819 // ignore special characters and space (c <= 32)
820 if c > 32 {
821 // append to the lower case of the rune to buffer
822 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
823 c = c + 'a' - 'A'
824 }
825
826 buffer.WriteByte(c)
827
828 // update the first rune index that is not a special rune
829 if !first_rune_seen {
830 first_rune_index = i
831 first_rune_seen = true
832 }
833
834 if c == ':' {
835 // colon rune found, we have found the scheme
836 scheme_last_index = i
837 break
838 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
839 // special case : most probably a relative URI
840 break
841 }
842 }
843 }
844
845 if scheme_last_index != -1 {
846 // scheme found
847 // copy the "lower case without special runes scheme" before the ":" rune
848 scheme_start_index := scheme_last_index - buffer.Len() + 1
849 copy(uri[scheme_start_index:], buffer.Bytes())
850 // and return the result
851 return uri[scheme_start_index:], buffer.String()
852 } else {
853 // scheme NOT found
854 return uri[first_rune_index:], ""
855 }
856}
857
858func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
859 // sanitize URI
860 uri, scheme := sanitizeURI(uri)
861
[28]862 // remove javascript protocol
[60]863 if scheme == "javascript:" {
[28]864 return "", nil
865 }
[57]866
[1]867 // TODO check malicious data: - e.g. data:script
[60]868 if scheme == "data:" {
[61]869 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
870 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
871 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
872 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
873 bytes.HasPrefix(uri, []byte("data:image/webp")) {
874 // should be safe
875 return string(uri), nil
876 } else {
877 // unsafe data
878 return "", nil
879 }
[1]880 }
881
[57]882 // parse the uri
[60]883 u, err := url.Parse(string(uri))
[1]884 if err != nil {
885 return "", err
886 }
[57]887
888 // get the fragment (with the prefix "#")
889 fragment := ""
890 if len(u.Fragment) > 0 {
891 fragment = "#" + u.Fragment
892 }
893
894 // reset the fragment: it is not included in the mortyurl
895 u.Fragment = ""
896
897 // merge the URI with the document URI
[28]898 u = mergeURIs(rc.BaseURL, u)
[1]899
[57]900 // simple internal link ?
901 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
902 if u.Scheme == rc.BaseURL.Scheme &&
903 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
904 u.Host == rc.BaseURL.Host &&
905 u.Path == rc.BaseURL.Path &&
906 u.RawQuery == rc.BaseURL.RawQuery {
907 // the fragment is the only difference between the document URI and the uri parameter
908 return fragment, nil
909 }
910
911 // return full URI and fragment (if not empty)
[60]912 morty_uri := u.String()
[1]913
914 if rc.Key == nil {
[60]915 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]916 }
[60]917 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]918}
919
920func inArray(b []byte, a [][]byte) bool {
921 for _, b2 := range a {
922 if bytes.Equal(b, b2) {
923 return true
924 }
925 }
926 return false
927}
928
929func hash(msg string, key []byte) string {
930 mac := hmac.New(sha256.New, key)
931 mac.Write([]byte(msg))
932 return hex.EncodeToString(mac.Sum(nil))
933}
934
935func verifyRequestURI(uri, hashMsg, key []byte) bool {
936 h := make([]byte, hex.DecodedLen(len(hashMsg)))
937 _, err := hex.Decode(h, hashMsg)
938 if err != nil {
[97]939 if DEBUG {
940 log.Println("hmac error:", err)
941 }
[1]942 return false
943 }
944 mac := hmac.New(sha256.New, key)
945 mac.Write(uri)
946 return hmac.Equal(h, mac.Sum(nil))
947}
948
[69]949func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
950 ctx.SetContentType("text/html")
951 ctx.SetStatusCode(403)
952 ctx.Write([]byte(MORTY_HTML_PAGE_START))
953 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
954 ctx.Write([]byte("<p>Following</p><p><a href=\""))
955 ctx.Write([]byte(html.EscapeString(uri.String())))
956 ctx.Write([]byte("\" rel=\"noreferrer\">"))
957 ctx.Write([]byte(html.EscapeString(uri.String())))
958 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
959 ctx.Write([]byte(MORTY_HTML_PAGE_END))
960}
961
[35]962func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]963 ctx.SetContentType("text/html; charset=UTF-8")
[35]964 ctx.SetStatusCode(statusCode)
[69]965 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]966 if err != nil {
[97]967 if DEBUG {
968 log.Println("error:", err)
969 }
[11]970 ctx.Write([]byte("<h2>Error: "))
971 ctx.Write([]byte(html.EscapeString(err.Error())))
972 ctx.Write([]byte("</h2>"))
973 }
[1]974 if p.Key == nil {
975 ctx.Write([]byte(`
[36]976 <form action="post">
977 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
978 <input type="submit" value="go" />
979 </form>`))
[11]980 } else {
981 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]982 }
[69]983 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]984}
985
986func main() {
[78]987 default_listen_addr := os.Getenv("MORTY_ADDRESS")
988 if default_listen_addr == "" {
989 default_listen_addr = "127.0.0.1:3000"
990 }
991 default_key := os.Getenv("MORTY_KEY")
992 listen := flag.String("listen", default_listen_addr, "Listen address")
[92]993 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
[24]994 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[74]995 version := flag.Bool("version", false, "Show version")
[4]996 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]997 flag.Parse()
998
[74]999 if *version {
1000 fmt.Println(VERSION)
1001 return
1002 }
1003
[24]1004 if *ipv6 {
1005 CLIENT.Dial = fasthttp.DialDualStack
1006 }
1007
[4]1008 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]1009
1010 if *key != "" {
[92]1011 var err error
1012 p.Key, err = base64.StdEncoding.DecodeString(*key)
[94]1013 if err != nil {
1014 log.Fatal("Error parsing -key", err.Error())
1015 os.Exit(1)
[92]1016 }
[1]1017 }
1018
1019 log.Println("listening on", *listen)
1020
1021 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1022 log.Fatal("Error in ListenAndServe:", err)
1023 }
1024}
Note: See TracBrowser for help on using the repository browser.