source: code/trunk/morty.go@ 96

Last change on this file since 96 was 96, checked in by alex, 6 years ago

[mod] follow HTTP redirect (only GET HTTP method)

close #48

File size: 28.2 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
[68]14 "mime"
[1]15 "net/url"
[78]16 "os"
[68]17 "path/filepath"
[1]18 "regexp"
19 "strings"
[4]20 "time"
[60]21 "unicode/utf8"
[1]22
23 "github.com/valyala/fasthttp"
24 "golang.org/x/net/html"
[45]25 "golang.org/x/net/html/charset"
26 "golang.org/x/text/encoding"
[68]27
28 "github.com/asciimoo/morty/contenttype"
[1]29)
30
31const (
32 STATE_DEFAULT int = 0
33 STATE_IN_STYLE int = 1
34 STATE_IN_NOSCRIPT int = 2
35)
36
[77]37const VERSION = "v0.2.0"
[74]38
[96]39const MAX_REDIRECT_COUNT = 5
40
[1]41var CLIENT *fasthttp.Client = &fasthttp.Client{
42 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
43}
44
[27]45var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]46
[68]47var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
48 // html
49 contenttype.NewFilterEquals("text", "html", ""),
50 contenttype.NewFilterEquals("application", "xhtml", "xml"),
51 // css
52 contenttype.NewFilterEquals("text", "css", ""),
53 // images
54 contenttype.NewFilterEquals("image", "gif", ""),
55 contenttype.NewFilterEquals("image", "png", ""),
56 contenttype.NewFilterEquals("image", "jpeg", ""),
57 contenttype.NewFilterEquals("image", "pjpeg", ""),
58 contenttype.NewFilterEquals("image", "webp", ""),
59 contenttype.NewFilterEquals("image", "tiff", ""),
60 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
61 contenttype.NewFilterEquals("image", "bmp", ""),
62 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
[88]63 contenttype.NewFilterEquals("image", "x-icon", ""),
[68]64 // fonts
65 contenttype.NewFilterEquals("application", "font-otf", ""),
66 contenttype.NewFilterEquals("application", "font-ttf", ""),
67 contenttype.NewFilterEquals("application", "font-woff", ""),
68 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
69})
70
71var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
72 // texts
73 contenttype.NewFilterEquals("text", "csv", ""),
74 contenttype.NewFilterEquals("text", "tab-separated-value", ""),
75 contenttype.NewFilterEquals("text", "plain", ""),
76 // API
77 contenttype.NewFilterEquals("application", "json", ""),
78 // Documents
79 contenttype.NewFilterEquals("application", "x-latex", ""),
80 contenttype.NewFilterEquals("application", "pdf", ""),
81 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
82 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
85 // Compressed archives
86 contenttype.NewFilterEquals("application", "zip", ""),
87 contenttype.NewFilterEquals("application", "gzip", ""),
88 contenttype.NewFilterEquals("application", "x-compressed", ""),
89 contenttype.NewFilterEquals("application", "x-gtar", ""),
90 contenttype.NewFilterEquals("application", "x-compress", ""),
91 // Generic binary
92 contenttype.NewFilterEquals("application", "octet-stream", ""),
93})
94
95var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
96 "charset": true,
97}
98
[1]99var UNSAFE_ELEMENTS [][]byte = [][]byte{
100 []byte("applet"),
101 []byte("canvas"),
102 []byte("embed"),
103 //[]byte("iframe"),
[46]104 []byte("math"),
[1]105 []byte("script"),
[46]106 []byte("svg"),
[1]107}
108
109var SAFE_ATTRIBUTES [][]byte = [][]byte{
110 []byte("abbr"),
111 []byte("accesskey"),
112 []byte("align"),
113 []byte("alt"),
[13]114 []byte("as"),
[1]115 []byte("autocomplete"),
116 []byte("charset"),
117 []byte("checked"),
118 []byte("class"),
119 []byte("content"),
120 []byte("contenteditable"),
121 []byte("contextmenu"),
122 []byte("dir"),
123 []byte("for"),
124 []byte("height"),
125 []byte("hidden"),
[46]126 []byte("hreflang"),
[1]127 []byte("id"),
128 []byte("lang"),
129 []byte("media"),
130 []byte("method"),
131 []byte("name"),
132 []byte("nowrap"),
133 []byte("placeholder"),
134 []byte("property"),
135 []byte("rel"),
136 []byte("spellcheck"),
137 []byte("tabindex"),
138 []byte("target"),
139 []byte("title"),
140 []byte("translate"),
141 []byte("type"),
142 []byte("value"),
143 []byte("width"),
144}
145
146var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
147 []byte("area"),
148 []byte("base"),
149 []byte("br"),
150 []byte("col"),
151 []byte("embed"),
152 []byte("hr"),
153 []byte("img"),
154 []byte("input"),
155 []byte("keygen"),
156 []byte("link"),
157 []byte("meta"),
158 []byte("param"),
159 []byte("source"),
160 []byte("track"),
161 []byte("wbr"),
162}
163
[46]164var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
165 []byte("alternate"),
166 []byte("archives"),
167 []byte("author"),
168 []byte("copyright"),
169 []byte("first"),
170 []byte("help"),
171 []byte("icon"),
172 []byte("index"),
173 []byte("last"),
174 []byte("license"),
175 []byte("manifest"),
176 []byte("next"),
177 []byte("pingback"),
178 []byte("prev"),
179 []byte("publisher"),
180 []byte("search"),
181 []byte("shortcut icon"),
182 []byte("stylesheet"),
183 []byte("up"),
184}
185
186var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
187 // X-UA-Compatible will be added automaticaly, so it can be skipped
188 []byte("date"),
189 []byte("last-modified"),
[50]190 []byte("refresh"), // URL rewrite
[46]191 // []byte("location"), TODO URL rewrite
192 []byte("content-language"),
193}
194
[1]195type Proxy struct {
[4]196 Key []byte
197 RequestTimeout time.Duration
[1]198}
199
200type RequestConfig struct {
201 Key []byte
[23]202 BaseURL *url.URL
[1]203}
204
[2]205var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]206
207var HTML_BODY_EXTENSION string = `
[72]208<input type="checkbox" id="mortytoggle" autocomplete="off" />
[1]209<div id="mortyheader">
[72]210 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
[1]211</div>
212<style>
[67]213#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
214#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
215#mortyheader p { padding: 0 0 0.7em 0; display: block; }
216#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
217#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
[1]218input[type=checkbox]#mortytoggle { display: none; }
[72]219input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
[1]220</style>
221`
222
[46]223var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
224<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]225<meta name="referrer" content="no-referrer">
[46]226`
[45]227
[69]228var MORTY_HTML_PAGE_START string = `<!doctype html>
229<html>
230<head>
231<title>MortyProxy</title>
232<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
233<style>
234html { height: 100%; }
235body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
236input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
237input[placeholder] { width:80%; }
238a { text-decoration: none; #2980b9; }
239h1, h2 { font-weight: 200; margin-bottom: 2rem; }
240h1 { font-size: 3em; }
241.container { flex:1; min-height: 100%; margin-bottom: 1em; }
242.footer { margin: 1em; }
243.footer p { font-size: 0.8em; }
244</style>
245</head>
246<body>
247 <div class="container">
248 <h1>MortyProxy</h1>
249`
250
251var MORTY_HTML_PAGE_END string = `
252 </div>
253 <div class="footer">
254 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
255 <a href="https://github.com/asciimoo/morty">view on github</a>
256 </p>
257 </div>
258</body>
259</html>`
260
[67]261var FAVICON_BYTES []byte
262
263func init() {
264 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
265
266 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
267}
268
[1]269func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]270
271 if appRequestHandler(ctx) {
272 return
273 }
274
[1]275 requestHash := popRequestParam(ctx, []byte("mortyhash"))
276
277 requestURI := popRequestParam(ctx, []byte("mortyurl"))
278
279 if requestURI == nil {
[35]280 p.serveMainPage(ctx, 200, nil)
[1]281 return
282 }
283
284 if p.Key != nil {
285 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]286 // HTTP status code 403 : Forbidden
287 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]288 return
289 }
290 }
291
[96]292 p.ProcessUri(ctx, string(requestURI), 0)
293}
[1]294
[96]295func (p *Proxy) ProcessUri(ctx *fasthttp.RequestCtx, requestURI string, redirectCount int) {
296 parsedURI, err := url.Parse(requestURI)
297
[11]298 if err != nil {
[35]299 // HTTP status code 500 : Internal Server Error
300 p.serveMainPage(ctx, 500, err)
[1]301 return
302 }
303
[69]304 // Serve an intermediate page for protocols other than HTTP(S)
305 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
306 p.serveExitMortyPage(ctx, parsedURI)
307 return
308 }
309
[1]310 req := fasthttp.AcquireRequest()
311 defer fasthttp.ReleaseRequest(req)
[12]312 req.SetConnectionClose()
[1]313
[47]314 requestURIStr := string(requestURI)
[1]315
[47]316 log.Println("getting", requestURIStr)
[1]317
[47]318 req.SetRequestURI(requestURIStr)
[62]319 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
[1]320
321 resp := fasthttp.AcquireResponse()
322 defer fasthttp.ReleaseResponse(resp)
323
324 req.Header.SetMethodBytes(ctx.Method())
325 if ctx.IsPost() || ctx.IsPut() {
326 req.SetBody(ctx.PostBody())
327 }
328
[11]329 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
330
331 if err != nil {
[35]332 if err == fasthttp.ErrTimeout {
333 // HTTP status code 504 : Gateway Time-Out
334 p.serveMainPage(ctx, 504, err)
335 } else {
336 // HTTP status code 500 : Internal Server Error
337 p.serveMainPage(ctx, 500, err)
338 }
[1]339 return
340 }
341
342 if resp.StatusCode() != 200 {
343 switch resp.StatusCode() {
[7]344 case 301, 302, 303, 307, 308:
[1]345 loc := resp.Header.Peek("Location")
346 if loc != nil {
[96]347 log.Println("redirect to", string(loc))
348 if ctx.IsGet() {
349 // GET method: Morty follows the redirect
350 if redirectCount < MAX_REDIRECT_COUNT {
351 p.ProcessUri(ctx, string(loc), redirectCount+1)
352 } else {
353 p.serveMainPage(ctx, 310, errors.New("Too many redirects"))
354 }
[1]355 return
[96]356 } else {
357 // Other HTTP methods: Morty does NOT follow the redirect
358 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
359 url, err := rc.ProxifyURI(loc)
360 if err == nil {
361 ctx.SetStatusCode(resp.StatusCode())
362 ctx.Response.Header.Add("Location", url)
363 return
364 }
[1]365 }
366 }
367 }
[47]368 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]369 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]370 return
371 }
372
[68]373 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]374
[68]375 if contentTypeBytes == nil {
[35]376 // HTTP status code 503 : Service Unavailable
377 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]378 return
379 }
380
[68]381 contentTypeString := string(contentTypeBytes)
382
383 // decode Content-Type header
384 contentType, error := contenttype.ParseContentType(contentTypeString)
385 if error != nil {
386 // HTTP status code 503 : Service Unavailable
387 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]388 return
389 }
390
[68]391 // content-disposition
392 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]393
[68]394 // check content type
395 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
396 // it is not a usual content type
397 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
398 // force attachment for allowed content type
399 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
400 } else {
401 // deny access to forbidden content type
402 // HTTP status code 403 : Forbidden
403 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
404 return
405 }
406 }
407
408 // HACK : replace */xhtml by text/html
409 if contentType.SubType == "xhtml" {
410 contentType.TopLevelType = "text"
411 contentType.SubType = "html"
412 contentType.Suffix = ""
413 }
414
415 // conversion to UTF-8
[1]416 var responseBody []byte
417
[68]418 if contentType.TopLevelType == "text" {
419 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]420 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
421 responseBody, err = e.NewDecoder().Bytes(resp.Body())
422 if err != nil {
423 // HTTP status code 503 : Service Unavailable
424 p.serveMainPage(ctx, 503, err)
425 return
426 }
427 } else {
428 responseBody = resp.Body()
[1]429 }
[68]430 // update the charset or specify it
431 contentType.Parameters["charset"] = "UTF-8"
[1]432 } else {
433 responseBody = resp.Body()
434 }
435
[68]436 //
437 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]438
[68]439 // set the content type
440 ctx.SetContentType(contentType.String())
441
442 // output according to MIME type
[1]443 switch {
[68]444 case contentType.SubType == "css" && contentType.Suffix == "":
[23]445 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]446 case contentType.SubType == "html" && contentType.Suffix == "":
[23]447 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]448 default:
[68]449 if contentDispositionBytes != nil {
450 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]451 }
[1]452 ctx.Write(responseBody)
453 }
454}
455
[68]456// force content-disposition to attachment
457func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
458 var contentDispositionParams map[string]string
459
460 if contentDispositionBytes != nil {
461 var err error
462 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
463 if err != nil {
464 contentDispositionParams = make(map[string]string)
465 }
466 } else {
467 contentDispositionParams = make(map[string]string)
468 }
469
470 _, fileNameDefined := contentDispositionParams["filename"]
471 if !fileNameDefined {
472 // TODO : sanitize filename
473 contentDispositionParams["fileName"] = filepath.Base(url.Path)
474 }
475
476 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
477}
478
[10]479func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]480 // serve robots.txt
[10]481 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
482 ctx.SetContentType("text/plain")
483 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
484 return true
485 }
[11]486
[67]487 // server favicon.ico
488 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
489 ctx.SetContentType("image/png")
490 ctx.Write(FAVICON_BYTES)
491 return true
492 }
493
[10]494 return false
495}
496
[1]497func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
498 param := ctx.QueryArgs().PeekBytes(paramName)
499
500 if param == nil {
501 param = ctx.PostArgs().PeekBytes(paramName)
502 if param != nil {
503 ctx.PostArgs().DelBytes(paramName)
504 }
505 } else {
506 ctx.QueryArgs().DelBytes(paramName)
507 }
508
509 return param
510}
511
[9]512func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]513 // TODO
514
515 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
516
517 if urlSlices == nil {
[9]518 out.Write(css)
[1]519 return
520 }
521
522 startIndex := 0
523
524 for _, s := range urlSlices {
[15]525 urlStart := s[4]
526 urlEnd := s[5]
[1]527
[60]528 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]529 out.Write(css[startIndex:urlStart])
530 out.Write([]byte(uri))
[1]531 startIndex = urlEnd
532 } else {
[36]533 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]534 }
535 }
536 if startIndex < len(css) {
[9]537 out.Write(css[startIndex:len(css)])
[1]538 }
539}
540
[9]541func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]542 r := bytes.NewReader(htmlDoc)
543 decoder := html.NewTokenizer(r)
544 decoder.AllowCDATA(true)
545
546 unsafeElements := make([][]byte, 0, 8)
547 state := STATE_DEFAULT
548 for {
549 token := decoder.Next()
550 if token == html.ErrorToken {
551 err := decoder.Err()
552 if err != io.EOF {
553 log.Println("failed to parse HTML:")
554 }
555 break
556 }
557
558 if len(unsafeElements) == 0 {
559
560 switch token {
561 case html.StartTagToken, html.SelfClosingTagToken:
562 tag, hasAttrs := decoder.TagName()
563 safe := !inArray(tag, UNSAFE_ELEMENTS)
564 if !safe {
565 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
566 var unsafeTag []byte = make([]byte, len(tag))
567 copy(unsafeTag, tag)
568 unsafeElements = append(unsafeElements, unsafeTag)
569 }
570 break
571 }
[38]572 if bytes.Equal(tag, []byte("base")) {
573 for {
574 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]575 if bytes.Equal(attrName, []byte("href")) {
576 parsedURI, err := url.Parse(string(attrValue))
577 if err == nil {
578 rc.BaseURL = parsedURI
579 }
[38]580 }
581 if !moreAttr {
582 break
583 }
584 }
585 break
586 }
[1]587 if bytes.Equal(tag, []byte("noscript")) {
588 state = STATE_IN_NOSCRIPT
589 break
590 }
591 var attrs [][][]byte
592 if hasAttrs {
593 for {
594 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]595 attrs = append(attrs, [][]byte{
596 attrName,
597 attrValue,
598 []byte(html.EscapeString(string(attrValue))),
599 })
[1]600 if !moreAttr {
601 break
602 }
603 }
[13]604 }
605 if bytes.Equal(tag, []byte("link")) {
606 sanitizeLinkTag(rc, out, attrs)
607 break
608 }
609
[45]610 if bytes.Equal(tag, []byte("meta")) {
611 sanitizeMetaTag(rc, out, attrs)
612 break
613 }
614
[13]615 fmt.Fprintf(out, "<%s", tag)
616
617 if hasAttrs {
[45]618 sanitizeAttrs(rc, out, attrs)
[1]619 }
[13]620
[1]621 if token == html.SelfClosingTagToken {
[9]622 fmt.Fprintf(out, " />")
[1]623 } else {
[9]624 fmt.Fprintf(out, ">")
[1]625 if bytes.Equal(tag, []byte("style")) {
626 state = STATE_IN_STYLE
627 }
628 }
[13]629
[45]630 if bytes.Equal(tag, []byte("head")) {
[46]631 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]632 }
633
[1]634 if bytes.Equal(tag, []byte("form")) {
635 var formURL *url.URL
636 for _, attr := range attrs {
637 if bytes.Equal(attr[0], []byte("action")) {
638 formURL, _ = url.Parse(string(attr[1]))
[28]639 formURL = mergeURIs(rc.BaseURL, formURL)
[1]640 break
641 }
642 }
643 if formURL == nil {
[23]644 formURL = rc.BaseURL
[1]645 }
[2]646 urlStr := formURL.String()
647 var key string
648 if rc.Key != nil {
649 key = hash(urlStr, rc.Key)
650 }
[9]651 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]652
653 }
654
655 case html.EndTagToken:
656 tag, _ := decoder.TagName()
657 writeEndTag := true
658 switch string(tag) {
659 case "body":
[23]660 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]661 case "style":
662 state = STATE_DEFAULT
663 case "noscript":
664 state = STATE_DEFAULT
665 writeEndTag = false
666 }
667 // skip noscript tags - only the tag, not the content, because javascript is sanitized
668 if writeEndTag {
[9]669 fmt.Fprintf(out, "</%s>", tag)
[1]670 }
671
672 case html.TextToken:
673 switch state {
674 case STATE_DEFAULT:
[9]675 fmt.Fprintf(out, "%s", decoder.Raw())
[1]676 case STATE_IN_STYLE:
[9]677 sanitizeCSS(rc, out, decoder.Raw())
[1]678 case STATE_IN_NOSCRIPT:
[9]679 sanitizeHTML(rc, out, decoder.Raw())
[1]680 }
681
[62]682 case html.CommentToken:
683 // ignore comment. TODO : parse IE conditional comment
684
685 case html.DoctypeToken:
[9]686 out.Write(decoder.Raw())
[1]687 }
688 } else {
689 switch token {
690 case html.StartTagToken:
691 tag, _ := decoder.TagName()
692 if inArray(tag, UNSAFE_ELEMENTS) {
693 unsafeElements = append(unsafeElements, tag)
694 }
695
696 case html.EndTagToken:
697 tag, _ := decoder.TagName()
698 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
699 unsafeElements = unsafeElements[:len(unsafeElements)-1]
700 }
701 }
702 }
703 }
704}
705
[13]706func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
707 exclude := false
708 for _, attr := range attrs {
709 attrName := attr[0]
710 attrValue := attr[1]
711 if bytes.Equal(attrName, []byte("rel")) {
[46]712 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]713 exclude = true
714 break
715 }
716 }
717 if bytes.Equal(attrName, []byte("as")) {
718 if bytes.Equal(attrValue, []byte("script")) {
719 exclude = true
720 break
721 }
722 }
723 }
724
725 if !exclude {
726 out.Write([]byte("<link"))
727 for _, attr := range attrs {
[21]728 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]729 }
730 out.Write([]byte(">"))
731 }
732}
733
[45]734func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]735 var http_equiv []byte
736 var content []byte
737
738 for _, attr := range attrs {
739 attrName := attr[0]
740 attrValue := attr[1]
741 if bytes.Equal(attrName, []byte("http-equiv")) {
742 http_equiv = bytes.ToLower(attrValue)
[46]743 // exclude some <meta http-equiv="..." ..>
744 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
745 return
746 }
[1]747 }
748 if bytes.Equal(attrName, []byte("content")) {
749 content = attrValue
750 }
[45]751 if bytes.Equal(attrName, []byte("charset")) {
752 // exclude <meta charset="...">
753 return
754 }
[1]755 }
756
[45]757 out.Write([]byte("<meta"))
[14]758 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
759 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
760 contentUrl := content[urlIndex+4:]
[36]761 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]762 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]763 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]764 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]765 }
766 }
767 // output proxify result
[60]768 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]769 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]770 }
771 } else {
[46]772 if len(http_equiv) > 0 {
773 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
774 }
[9]775 sanitizeAttrs(rc, out, attrs)
[1]776 }
[45]777 out.Write([]byte(">"))
[1]778}
779
[9]780func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]781 for _, attr := range attrs {
[21]782 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]783 }
784}
785
[21]786func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]787 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]788 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]789 return
790 }
791 switch string(attrName) {
792 case "src", "href", "action":
[60]793 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]794 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]795 } else {
[36]796 log.Println("cannot proxify uri:", string(attrValue))
[1]797 }
798 case "style":
[21]799 cssAttr := bytes.NewBuffer(nil)
800 sanitizeCSS(rc, cssAttr, attrValue)
801 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]802 }
803}
804
[36]805func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]806 if u2 == nil {
807 return u1
808 }
[28]809 return u1.ResolveReference(u2)
[1]810}
811
[60]812// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
813// avoid memory allocation (except for the scheme)
814func sanitizeURI(uri []byte) ([]byte, string) {
815 first_rune_index := 0
816 first_rune_seen := false
817 scheme_last_index := -1
818 buffer := bytes.NewBuffer(make([]byte, 0, 10))
819
820 // remove trailing space and special characters
821 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
822
823 // loop over byte by byte
824 for i, c := range uri {
825 // ignore special characters and space (c <= 32)
826 if c > 32 {
827 // append to the lower case of the rune to buffer
828 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
829 c = c + 'a' - 'A'
830 }
831
832 buffer.WriteByte(c)
833
834 // update the first rune index that is not a special rune
835 if !first_rune_seen {
836 first_rune_index = i
837 first_rune_seen = true
838 }
839
840 if c == ':' {
841 // colon rune found, we have found the scheme
842 scheme_last_index = i
843 break
844 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
845 // special case : most probably a relative URI
846 break
847 }
848 }
849 }
850
851 if scheme_last_index != -1 {
852 // scheme found
853 // copy the "lower case without special runes scheme" before the ":" rune
854 scheme_start_index := scheme_last_index - buffer.Len() + 1
855 copy(uri[scheme_start_index:], buffer.Bytes())
856 // and return the result
857 return uri[scheme_start_index:], buffer.String()
858 } else {
859 // scheme NOT found
860 return uri[first_rune_index:], ""
861 }
862}
863
864func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
865 // sanitize URI
866 uri, scheme := sanitizeURI(uri)
867
[28]868 // remove javascript protocol
[60]869 if scheme == "javascript:" {
[28]870 return "", nil
871 }
[57]872
[1]873 // TODO check malicious data: - e.g. data:script
[60]874 if scheme == "data:" {
[61]875 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
876 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
877 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
878 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
879 bytes.HasPrefix(uri, []byte("data:image/webp")) {
880 // should be safe
881 return string(uri), nil
882 } else {
883 // unsafe data
884 return "", nil
885 }
[1]886 }
887
[57]888 // parse the uri
[60]889 u, err := url.Parse(string(uri))
[1]890 if err != nil {
891 return "", err
892 }
[57]893
894 // get the fragment (with the prefix "#")
895 fragment := ""
896 if len(u.Fragment) > 0 {
897 fragment = "#" + u.Fragment
898 }
899
900 // reset the fragment: it is not included in the mortyurl
901 u.Fragment = ""
902
903 // merge the URI with the document URI
[28]904 u = mergeURIs(rc.BaseURL, u)
[1]905
[57]906 // simple internal link ?
907 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
908 if u.Scheme == rc.BaseURL.Scheme &&
909 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
910 u.Host == rc.BaseURL.Host &&
911 u.Path == rc.BaseURL.Path &&
912 u.RawQuery == rc.BaseURL.RawQuery {
913 // the fragment is the only difference between the document URI and the uri parameter
914 return fragment, nil
915 }
916
917 // return full URI and fragment (if not empty)
[60]918 morty_uri := u.String()
[1]919
920 if rc.Key == nil {
[60]921 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]922 }
[60]923 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]924}
925
926func inArray(b []byte, a [][]byte) bool {
927 for _, b2 := range a {
928 if bytes.Equal(b, b2) {
929 return true
930 }
931 }
932 return false
933}
934
935func hash(msg string, key []byte) string {
936 mac := hmac.New(sha256.New, key)
937 mac.Write([]byte(msg))
938 return hex.EncodeToString(mac.Sum(nil))
939}
940
941func verifyRequestURI(uri, hashMsg, key []byte) bool {
942 h := make([]byte, hex.DecodedLen(len(hashMsg)))
943 _, err := hex.Decode(h, hashMsg)
944 if err != nil {
945 log.Println("hmac error:", err)
946 return false
947 }
948 mac := hmac.New(sha256.New, key)
949 mac.Write(uri)
950 return hmac.Equal(h, mac.Sum(nil))
951}
952
[69]953func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
954 ctx.SetContentType("text/html")
955 ctx.SetStatusCode(403)
956 ctx.Write([]byte(MORTY_HTML_PAGE_START))
957 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
958 ctx.Write([]byte("<p>Following</p><p><a href=\""))
959 ctx.Write([]byte(html.EscapeString(uri.String())))
960 ctx.Write([]byte("\" rel=\"noreferrer\">"))
961 ctx.Write([]byte(html.EscapeString(uri.String())))
962 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
963 ctx.Write([]byte(MORTY_HTML_PAGE_END))
964}
965
[35]966func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]967 ctx.SetContentType("text/html; charset=UTF-8")
[35]968 ctx.SetStatusCode(statusCode)
[69]969 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]970 if err != nil {
971 log.Println("error:", err)
972 ctx.Write([]byte("<h2>Error: "))
973 ctx.Write([]byte(html.EscapeString(err.Error())))
974 ctx.Write([]byte("</h2>"))
975 }
[1]976 if p.Key == nil {
977 ctx.Write([]byte(`
[36]978 <form action="post">
979 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
980 <input type="submit" value="go" />
981 </form>`))
[11]982 } else {
983 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]984 }
[69]985 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]986}
987
988func main() {
[78]989 default_listen_addr := os.Getenv("MORTY_ADDRESS")
990 if default_listen_addr == "" {
991 default_listen_addr = "127.0.0.1:3000"
992 }
993 default_key := os.Getenv("MORTY_KEY")
994 listen := flag.String("listen", default_listen_addr, "Listen address")
[92]995 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
[24]996 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[74]997 version := flag.Bool("version", false, "Show version")
[4]998 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]999 flag.Parse()
1000
[74]1001 if *version {
1002 fmt.Println(VERSION)
1003 return
1004 }
1005
[24]1006 if *ipv6 {
1007 CLIENT.Dial = fasthttp.DialDualStack
1008 }
1009
[4]1010 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]1011
1012 if *key != "" {
[92]1013 var err error
1014 p.Key, err = base64.StdEncoding.DecodeString(*key)
[94]1015 if err != nil {
1016 log.Fatal("Error parsing -key", err.Error())
1017 os.Exit(1)
[92]1018 }
[1]1019 }
1020
1021 log.Println("listening on", *listen)
1022
1023 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1024 log.Fatal("Error in ListenAndServe:", err)
1025 }
1026}
Note: See TracBrowser for help on using the repository browser.