source: code/trunk/morty.go@ 125

Last change on this file since 125 was 125, checked in by asciimoo, 5 years ago

[fix] concatenate url params to existing ones

File size: 29.5 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
[121]12 "html/template"
[1]13 "io"
14 "log"
[68]15 "mime"
[1]16 "net/url"
[78]17 "os"
[68]18 "path/filepath"
[1]19 "regexp"
20 "strings"
[4]21 "time"
[60]22 "unicode/utf8"
[1]23
24 "github.com/valyala/fasthttp"
[109]25 "github.com/valyala/fasthttp/fasthttpproxy"
[1]26 "golang.org/x/net/html"
[45]27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
[68]29
30 "github.com/asciimoo/morty/contenttype"
[1]31)
32
33const (
34 STATE_DEFAULT int = 0
35 STATE_IN_STYLE int = 1
36 STATE_IN_NOSCRIPT int = 2
37)
38
[77]39const VERSION = "v0.2.0"
[74]40
[100]41var DEBUG = os.Getenv("DEBUG") != "false"
[96]42
[1]43var CLIENT *fasthttp.Client = &fasthttp.Client{
44 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
[113]45 ReadBufferSize: 16 * 1024, // 16K
[1]46}
47
[27]48var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]49
[68]50var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
51 // html
52 contenttype.NewFilterEquals("text", "html", ""),
53 contenttype.NewFilterEquals("application", "xhtml", "xml"),
54 // css
55 contenttype.NewFilterEquals("text", "css", ""),
56 // images
57 contenttype.NewFilterEquals("image", "gif", ""),
58 contenttype.NewFilterEquals("image", "png", ""),
59 contenttype.NewFilterEquals("image", "jpeg", ""),
60 contenttype.NewFilterEquals("image", "pjpeg", ""),
61 contenttype.NewFilterEquals("image", "webp", ""),
62 contenttype.NewFilterEquals("image", "tiff", ""),
63 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
64 contenttype.NewFilterEquals("image", "bmp", ""),
65 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
[88]66 contenttype.NewFilterEquals("image", "x-icon", ""),
[68]67 // fonts
68 contenttype.NewFilterEquals("application", "font-otf", ""),
69 contenttype.NewFilterEquals("application", "font-ttf", ""),
70 contenttype.NewFilterEquals("application", "font-woff", ""),
71 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
72})
73
74var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
75 // texts
76 contenttype.NewFilterEquals("text", "csv", ""),
[103]77 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
[68]78 contenttype.NewFilterEquals("text", "plain", ""),
79 // API
80 contenttype.NewFilterEquals("application", "json", ""),
81 // Documents
82 contenttype.NewFilterEquals("application", "x-latex", ""),
83 contenttype.NewFilterEquals("application", "pdf", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
88 // Compressed archives
89 contenttype.NewFilterEquals("application", "zip", ""),
90 contenttype.NewFilterEquals("application", "gzip", ""),
91 contenttype.NewFilterEquals("application", "x-compressed", ""),
92 contenttype.NewFilterEquals("application", "x-gtar", ""),
93 contenttype.NewFilterEquals("application", "x-compress", ""),
94 // Generic binary
95 contenttype.NewFilterEquals("application", "octet-stream", ""),
96})
97
98var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
99 "charset": true,
100}
101
[1]102var UNSAFE_ELEMENTS [][]byte = [][]byte{
103 []byte("applet"),
104 []byte("canvas"),
105 []byte("embed"),
106 //[]byte("iframe"),
[46]107 []byte("math"),
[1]108 []byte("script"),
[46]109 []byte("svg"),
[1]110}
111
112var SAFE_ATTRIBUTES [][]byte = [][]byte{
113 []byte("abbr"),
114 []byte("accesskey"),
115 []byte("align"),
116 []byte("alt"),
[13]117 []byte("as"),
[1]118 []byte("autocomplete"),
119 []byte("charset"),
120 []byte("checked"),
121 []byte("class"),
122 []byte("content"),
123 []byte("contenteditable"),
124 []byte("contextmenu"),
125 []byte("dir"),
126 []byte("for"),
127 []byte("height"),
128 []byte("hidden"),
[46]129 []byte("hreflang"),
[1]130 []byte("id"),
131 []byte("lang"),
132 []byte("media"),
133 []byte("method"),
134 []byte("name"),
135 []byte("nowrap"),
136 []byte("placeholder"),
137 []byte("property"),
138 []byte("rel"),
139 []byte("spellcheck"),
140 []byte("tabindex"),
141 []byte("target"),
142 []byte("title"),
143 []byte("translate"),
144 []byte("type"),
145 []byte("value"),
146 []byte("width"),
147}
148
[46]149var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
150 []byte("alternate"),
151 []byte("archives"),
152 []byte("author"),
153 []byte("copyright"),
154 []byte("first"),
155 []byte("help"),
156 []byte("icon"),
157 []byte("index"),
158 []byte("last"),
159 []byte("license"),
160 []byte("manifest"),
161 []byte("next"),
162 []byte("pingback"),
163 []byte("prev"),
164 []byte("publisher"),
165 []byte("search"),
166 []byte("shortcut icon"),
167 []byte("stylesheet"),
168 []byte("up"),
169}
170
171var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
172 // X-UA-Compatible will be added automaticaly, so it can be skipped
173 []byte("date"),
174 []byte("last-modified"),
[50]175 []byte("refresh"), // URL rewrite
[46]176 // []byte("location"), TODO URL rewrite
177 []byte("content-language"),
178}
179
[1]180type Proxy struct {
[4]181 Key []byte
182 RequestTimeout time.Duration
[1]183}
184
185type RequestConfig struct {
[124]186 Key []byte
187 BaseURL *url.URL
188 BodyInjected bool
[1]189}
190
[121]191type HTMLBodyExtParam struct {
192 BaseURL string
193 HasMortyKey bool
194}
[1]195
[121]196type HTMLFormExtParam struct {
[122]197 BaseURL string
198 MortyHash string
[121]199}
[1]200
[121]201var HTML_FORM_EXTENSION *template.Template
202var HTML_BODY_EXTENSION *template.Template
[46]203var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
204<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]205<meta name="referrer" content="no-referrer">
[46]206`
[45]207
[69]208var MORTY_HTML_PAGE_START string = `<!doctype html>
209<html>
210<head>
211<title>MortyProxy</title>
212<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
213<style>
214html { height: 100%; }
215body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
216input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
217input[placeholder] { width:80%; }
218a { text-decoration: none; #2980b9; }
219h1, h2 { font-weight: 200; margin-bottom: 2rem; }
220h1 { font-size: 3em; }
221.container { flex:1; min-height: 100%; margin-bottom: 1em; }
222.footer { margin: 1em; }
223.footer p { font-size: 0.8em; }
224</style>
225</head>
226<body>
227 <div class="container">
228 <h1>MortyProxy</h1>
229`
230
231var MORTY_HTML_PAGE_END string = `
232 </div>
233 <div class="footer">
234 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
235 <a href="https://github.com/asciimoo/morty">view on github</a>
236 </p>
237 </div>
238</body>
239</html>`
240
[67]241var FAVICON_BYTES []byte
242
243func init() {
244 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
245
246 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
[121]247 var err error
248 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
[123]249 `<input type="hidden" name="mortyurl" value="{{.BaseURL}}" />{{if .MortyHash}}<input type="hidden" name="mortyhash" value="{{.MortyHash}}" />{{end}}`)
[121]250 if err != nil {
251 panic(err)
252 }
253 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
254<input type="checkbox" id="mortytoggle" autocomplete="off" />
255<div id="mortyheader">
256 <form method="get">
257 <label for="mortytoggle">hide</label>
258 <span><a href="/">Morty Proxy</a></span>
259 <input type="url" value="{{.BaseURL}}" name="mortyurl" {{if .HasMortyKey }}readonly="true"{{end}} />
260 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
261 </form>
262</div>
263<style>
264body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
265#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
266#mortyheader p { padding: 0 0 0.7em 0; display: block; }
267#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
268#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
269#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
270input[type=checkbox]#mortytoggle { display: none; }
271input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
272#mortyheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
273</style>
274`)
275 if err != nil {
276 panic(err)
277 }
[67]278}
279
[1]280func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]281
282 if appRequestHandler(ctx) {
283 return
284 }
285
[1]286 requestHash := popRequestParam(ctx, []byte("mortyhash"))
287
288 requestURI := popRequestParam(ctx, []byte("mortyurl"))
289
290 if requestURI == nil {
[35]291 p.serveMainPage(ctx, 200, nil)
[1]292 return
293 }
294
295 if p.Key != nil {
296 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]297 // HTTP status code 403 : Forbidden
298 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]299 return
300 }
301 }
302
[118]303 requestURIQuery := ctx.QueryArgs().QueryString()
304 if len(requestURIQuery) > 0 {
[125]305 if bytes.ContainsRune(requestURI, '?') {
306 requestURI = append(requestURI, '&')
307 } else {
308 requestURI = append(requestURI, '?')
309 }
[118]310 requestURI = append(requestURI, requestURIQuery...)
311 }
312
[97]313 parsedURI, err := url.Parse(string(requestURI))
[1]314
[11]315 if err != nil {
[35]316 // HTTP status code 500 : Internal Server Error
317 p.serveMainPage(ctx, 500, err)
[1]318 return
319 }
320
[120]321 if parsedURI.Scheme == "" {
322 parsedURI.Scheme = "https"
323 requestURI = append([]byte("https://"), requestURI...)
324 }
325
[69]326 // Serve an intermediate page for protocols other than HTTP(S)
327 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
328 p.serveExitMortyPage(ctx, parsedURI)
329 return
330 }
331
[1]332 req := fasthttp.AcquireRequest()
333 defer fasthttp.ReleaseRequest(req)
[12]334 req.SetConnectionClose()
[1]335
[47]336 requestURIStr := string(requestURI)
[1]337
[97]338 if DEBUG {
339 log.Println("getting", requestURIStr)
340 }
[1]341
[47]342 req.SetRequestURI(requestURIStr)
[111]343 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
[1]344
345 resp := fasthttp.AcquireResponse()
346 defer fasthttp.ReleaseResponse(resp)
347
348 req.Header.SetMethodBytes(ctx.Method())
349 if ctx.IsPost() || ctx.IsPut() {
350 req.SetBody(ctx.PostBody())
351 }
352
[11]353 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
354
355 if err != nil {
[35]356 if err == fasthttp.ErrTimeout {
357 // HTTP status code 504 : Gateway Time-Out
358 p.serveMainPage(ctx, 504, err)
359 } else {
360 // HTTP status code 500 : Internal Server Error
361 p.serveMainPage(ctx, 500, err)
362 }
[1]363 return
364 }
365
366 if resp.StatusCode() != 200 {
367 switch resp.StatusCode() {
[7]368 case 301, 302, 303, 307, 308:
[1]369 loc := resp.Header.Peek("Location")
370 if loc != nil {
[97]371 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
372 url, err := rc.ProxifyURI(loc)
373 if err == nil {
374 ctx.SetStatusCode(resp.StatusCode())
375 ctx.Response.Header.Add("Location", url)
376 if DEBUG {
377 log.Println("redirect to", string(loc))
[96]378 }
[1]379 return
380 }
381 }
382 }
[47]383 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]384 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]385 return
386 }
387
[68]388 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]389
[68]390 if contentTypeBytes == nil {
[35]391 // HTTP status code 503 : Service Unavailable
392 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]393 return
394 }
395
[68]396 contentTypeString := string(contentTypeBytes)
397
398 // decode Content-Type header
399 contentType, error := contenttype.ParseContentType(contentTypeString)
400 if error != nil {
401 // HTTP status code 503 : Service Unavailable
402 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]403 return
404 }
405
[68]406 // content-disposition
407 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]408
[68]409 // check content type
410 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
411 // it is not a usual content type
412 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
413 // force attachment for allowed content type
414 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
415 } else {
416 // deny access to forbidden content type
417 // HTTP status code 403 : Forbidden
418 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
419 return
420 }
421 }
422
423 // HACK : replace */xhtml by text/html
424 if contentType.SubType == "xhtml" {
425 contentType.TopLevelType = "text"
426 contentType.SubType = "html"
427 contentType.Suffix = ""
428 }
429
430 // conversion to UTF-8
[1]431 var responseBody []byte
432
[68]433 if contentType.TopLevelType == "text" {
434 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]435 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
436 responseBody, err = e.NewDecoder().Bytes(resp.Body())
437 if err != nil {
438 // HTTP status code 503 : Service Unavailable
439 p.serveMainPage(ctx, 503, err)
440 return
441 }
442 } else {
443 responseBody = resp.Body()
[1]444 }
[68]445 // update the charset or specify it
446 contentType.Parameters["charset"] = "UTF-8"
[1]447 } else {
448 responseBody = resp.Body()
449 }
450
[68]451 //
452 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]453
[68]454 // set the content type
455 ctx.SetContentType(contentType.String())
456
457 // output according to MIME type
[1]458 switch {
[68]459 case contentType.SubType == "css" && contentType.Suffix == "":
[23]460 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]461 case contentType.SubType == "html" && contentType.Suffix == "":
[124]462 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
463 sanitizeHTML(rc, ctx, responseBody)
464 if !rc.BodyInjected {
465 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
466 if len(rc.Key) > 0 {
467 p.HasMortyKey = true
468 }
469 err := HTML_BODY_EXTENSION.Execute(ctx, p)
470 if err != nil {
471 if DEBUG {
472 fmt.Println("failed to inject body extension", err)
473 }
474 }
475 }
[1]476 default:
[68]477 if contentDispositionBytes != nil {
478 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]479 }
[1]480 ctx.Write(responseBody)
481 }
482}
483
[68]484// force content-disposition to attachment
485func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
486 var contentDispositionParams map[string]string
487
488 if contentDispositionBytes != nil {
489 var err error
490 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
491 if err != nil {
492 contentDispositionParams = make(map[string]string)
493 }
494 } else {
495 contentDispositionParams = make(map[string]string)
496 }
497
498 _, fileNameDefined := contentDispositionParams["filename"]
499 if !fileNameDefined {
500 // TODO : sanitize filename
501 contentDispositionParams["fileName"] = filepath.Base(url.Path)
502 }
503
504 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
505}
506
[10]507func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]508 // serve robots.txt
[10]509 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
510 ctx.SetContentType("text/plain")
511 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
512 return true
513 }
[11]514
[67]515 // server favicon.ico
516 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
517 ctx.SetContentType("image/png")
518 ctx.Write(FAVICON_BYTES)
519 return true
520 }
521
[10]522 return false
523}
524
[1]525func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
526 param := ctx.QueryArgs().PeekBytes(paramName)
527
528 if param == nil {
529 param = ctx.PostArgs().PeekBytes(paramName)
[121]530 ctx.PostArgs().DelBytes(paramName)
[1]531 }
[121]532 ctx.QueryArgs().DelBytes(paramName)
[1]533
534 return param
535}
536
[9]537func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]538 // TODO
539
540 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
541
542 if urlSlices == nil {
[9]543 out.Write(css)
[1]544 return
545 }
546
547 startIndex := 0
548
549 for _, s := range urlSlices {
[15]550 urlStart := s[4]
551 urlEnd := s[5]
[1]552
[60]553 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]554 out.Write(css[startIndex:urlStart])
555 out.Write([]byte(uri))
[1]556 startIndex = urlEnd
[97]557 } else if DEBUG {
[36]558 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]559 }
560 }
561 if startIndex < len(css) {
[9]562 out.Write(css[startIndex:len(css)])
[1]563 }
564}
565
[9]566func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]567 r := bytes.NewReader(htmlDoc)
568 decoder := html.NewTokenizer(r)
569 decoder.AllowCDATA(true)
570
571 unsafeElements := make([][]byte, 0, 8)
572 state := STATE_DEFAULT
573 for {
574 token := decoder.Next()
575 if token == html.ErrorToken {
576 err := decoder.Err()
577 if err != io.EOF {
[97]578 log.Println("failed to parse HTML")
[1]579 }
580 break
581 }
582
583 if len(unsafeElements) == 0 {
584
585 switch token {
586 case html.StartTagToken, html.SelfClosingTagToken:
587 tag, hasAttrs := decoder.TagName()
588 safe := !inArray(tag, UNSAFE_ELEMENTS)
589 if !safe {
[116]590 if token != html.SelfClosingTagToken {
[1]591 var unsafeTag []byte = make([]byte, len(tag))
592 copy(unsafeTag, tag)
593 unsafeElements = append(unsafeElements, unsafeTag)
594 }
595 break
596 }
[38]597 if bytes.Equal(tag, []byte("base")) {
598 for {
599 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]600 if bytes.Equal(attrName, []byte("href")) {
601 parsedURI, err := url.Parse(string(attrValue))
602 if err == nil {
603 rc.BaseURL = parsedURI
604 }
[38]605 }
606 if !moreAttr {
607 break
608 }
609 }
610 break
611 }
[1]612 if bytes.Equal(tag, []byte("noscript")) {
613 state = STATE_IN_NOSCRIPT
614 break
615 }
616 var attrs [][][]byte
617 if hasAttrs {
618 for {
619 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]620 attrs = append(attrs, [][]byte{
621 attrName,
622 attrValue,
623 []byte(html.EscapeString(string(attrValue))),
624 })
[1]625 if !moreAttr {
626 break
627 }
628 }
[13]629 }
630 if bytes.Equal(tag, []byte("link")) {
631 sanitizeLinkTag(rc, out, attrs)
632 break
633 }
634
[45]635 if bytes.Equal(tag, []byte("meta")) {
636 sanitizeMetaTag(rc, out, attrs)
637 break
638 }
639
[13]640 fmt.Fprintf(out, "<%s", tag)
641
642 if hasAttrs {
[45]643 sanitizeAttrs(rc, out, attrs)
[1]644 }
[13]645
[1]646 if token == html.SelfClosingTagToken {
[9]647 fmt.Fprintf(out, " />")
[1]648 } else {
[9]649 fmt.Fprintf(out, ">")
[1]650 if bytes.Equal(tag, []byte("style")) {
651 state = STATE_IN_STYLE
652 }
653 }
[13]654
[45]655 if bytes.Equal(tag, []byte("head")) {
[46]656 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]657 }
658
[1]659 if bytes.Equal(tag, []byte("form")) {
660 var formURL *url.URL
661 for _, attr := range attrs {
662 if bytes.Equal(attr[0], []byte("action")) {
663 formURL, _ = url.Parse(string(attr[1]))
[28]664 formURL = mergeURIs(rc.BaseURL, formURL)
[1]665 break
666 }
667 }
668 if formURL == nil {
[23]669 formURL = rc.BaseURL
[1]670 }
[2]671 urlStr := formURL.String()
672 var key string
673 if rc.Key != nil {
674 key = hash(urlStr, rc.Key)
675 }
[121]676 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key})
677 if err != nil {
678 if DEBUG {
679 fmt.Println("failed to inject body extension", err)
680 }
681 }
[1]682 }
683
684 case html.EndTagToken:
685 tag, _ := decoder.TagName()
686 writeEndTag := true
687 switch string(tag) {
688 case "body":
[121]689 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
690 if len(rc.Key) > 0 {
691 p.HasMortyKey = true
692 }
693 err := HTML_BODY_EXTENSION.Execute(out, p)
694 if err != nil {
695 if DEBUG {
696 fmt.Println("failed to inject body extension", err)
697 }
698 }
[124]699 rc.BodyInjected = true
[1]700 case "style":
701 state = STATE_DEFAULT
702 case "noscript":
703 state = STATE_DEFAULT
704 writeEndTag = false
705 }
706 // skip noscript tags - only the tag, not the content, because javascript is sanitized
707 if writeEndTag {
[9]708 fmt.Fprintf(out, "</%s>", tag)
[1]709 }
710
711 case html.TextToken:
712 switch state {
713 case STATE_DEFAULT:
[9]714 fmt.Fprintf(out, "%s", decoder.Raw())
[1]715 case STATE_IN_STYLE:
[9]716 sanitizeCSS(rc, out, decoder.Raw())
[1]717 case STATE_IN_NOSCRIPT:
[9]718 sanitizeHTML(rc, out, decoder.Raw())
[1]719 }
720
[62]721 case html.CommentToken:
722 // ignore comment. TODO : parse IE conditional comment
723
724 case html.DoctypeToken:
[9]725 out.Write(decoder.Raw())
[1]726 }
727 } else {
728 switch token {
[116]729 case html.StartTagToken, html.SelfClosingTagToken:
[1]730 tag, _ := decoder.TagName()
731 if inArray(tag, UNSAFE_ELEMENTS) {
732 unsafeElements = append(unsafeElements, tag)
733 }
734
735 case html.EndTagToken:
736 tag, _ := decoder.TagName()
737 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
738 unsafeElements = unsafeElements[:len(unsafeElements)-1]
739 }
740 }
741 }
742 }
743}
744
[13]745func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
746 exclude := false
747 for _, attr := range attrs {
748 attrName := attr[0]
749 attrValue := attr[1]
750 if bytes.Equal(attrName, []byte("rel")) {
[46]751 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]752 exclude = true
753 break
754 }
755 }
756 if bytes.Equal(attrName, []byte("as")) {
757 if bytes.Equal(attrValue, []byte("script")) {
758 exclude = true
759 break
760 }
761 }
762 }
763
764 if !exclude {
765 out.Write([]byte("<link"))
766 for _, attr := range attrs {
[21]767 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]768 }
769 out.Write([]byte(">"))
770 }
771}
772
[45]773func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]774 var http_equiv []byte
775 var content []byte
776
777 for _, attr := range attrs {
778 attrName := attr[0]
779 attrValue := attr[1]
780 if bytes.Equal(attrName, []byte("http-equiv")) {
781 http_equiv = bytes.ToLower(attrValue)
[46]782 // exclude some <meta http-equiv="..." ..>
783 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
784 return
785 }
[1]786 }
787 if bytes.Equal(attrName, []byte("content")) {
788 content = attrValue
789 }
[45]790 if bytes.Equal(attrName, []byte("charset")) {
791 // exclude <meta charset="...">
792 return
793 }
[1]794 }
795
[45]796 out.Write([]byte("<meta"))
[14]797 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
798 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
799 contentUrl := content[urlIndex+4:]
[36]800 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]801 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]802 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]803 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]804 }
805 }
806 // output proxify result
[60]807 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]808 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]809 }
810 } else {
[46]811 if len(http_equiv) > 0 {
812 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
813 }
[9]814 sanitizeAttrs(rc, out, attrs)
[1]815 }
[45]816 out.Write([]byte(">"))
[1]817}
818
[9]819func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]820 for _, attr := range attrs {
[21]821 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]822 }
823}
824
[21]825func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]826 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]827 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]828 return
829 }
830 switch string(attrName) {
831 case "src", "href", "action":
[60]832 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]833 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[97]834 } else if DEBUG {
[36]835 log.Println("cannot proxify uri:", string(attrValue))
[1]836 }
837 case "style":
[21]838 cssAttr := bytes.NewBuffer(nil)
839 sanitizeCSS(rc, cssAttr, attrValue)
840 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]841 }
842}
843
[36]844func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]845 if u2 == nil {
846 return u1
847 }
[28]848 return u1.ResolveReference(u2)
[1]849}
850
[60]851// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
852// avoid memory allocation (except for the scheme)
853func sanitizeURI(uri []byte) ([]byte, string) {
854 first_rune_index := 0
855 first_rune_seen := false
856 scheme_last_index := -1
857 buffer := bytes.NewBuffer(make([]byte, 0, 10))
858
859 // remove trailing space and special characters
860 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
861
862 // loop over byte by byte
863 for i, c := range uri {
864 // ignore special characters and space (c <= 32)
865 if c > 32 {
866 // append to the lower case of the rune to buffer
867 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
868 c = c + 'a' - 'A'
869 }
870
871 buffer.WriteByte(c)
872
873 // update the first rune index that is not a special rune
874 if !first_rune_seen {
875 first_rune_index = i
876 first_rune_seen = true
877 }
878
879 if c == ':' {
880 // colon rune found, we have found the scheme
881 scheme_last_index = i
882 break
883 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
884 // special case : most probably a relative URI
885 break
886 }
887 }
888 }
889
890 if scheme_last_index != -1 {
891 // scheme found
892 // copy the "lower case without special runes scheme" before the ":" rune
893 scheme_start_index := scheme_last_index - buffer.Len() + 1
894 copy(uri[scheme_start_index:], buffer.Bytes())
895 // and return the result
896 return uri[scheme_start_index:], buffer.String()
897 } else {
898 // scheme NOT found
899 return uri[first_rune_index:], ""
900 }
901}
902
903func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
904 // sanitize URI
905 uri, scheme := sanitizeURI(uri)
906
[28]907 // remove javascript protocol
[60]908 if scheme == "javascript:" {
[28]909 return "", nil
910 }
[57]911
[1]912 // TODO check malicious data: - e.g. data:script
[60]913 if scheme == "data:" {
[61]914 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
915 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
916 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
917 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
918 bytes.HasPrefix(uri, []byte("data:image/webp")) {
919 // should be safe
920 return string(uri), nil
921 } else {
922 // unsafe data
923 return "", nil
924 }
[1]925 }
926
[57]927 // parse the uri
[60]928 u, err := url.Parse(string(uri))
[1]929 if err != nil {
930 return "", err
931 }
[57]932
933 // get the fragment (with the prefix "#")
934 fragment := ""
935 if len(u.Fragment) > 0 {
936 fragment = "#" + u.Fragment
937 }
938
939 // reset the fragment: it is not included in the mortyurl
940 u.Fragment = ""
941
942 // merge the URI with the document URI
[28]943 u = mergeURIs(rc.BaseURL, u)
[1]944
[57]945 // simple internal link ?
946 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
947 if u.Scheme == rc.BaseURL.Scheme &&
948 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
949 u.Host == rc.BaseURL.Host &&
950 u.Path == rc.BaseURL.Path &&
951 u.RawQuery == rc.BaseURL.RawQuery {
952 // the fragment is the only difference between the document URI and the uri parameter
953 return fragment, nil
954 }
955
956 // return full URI and fragment (if not empty)
[60]957 morty_uri := u.String()
[1]958
959 if rc.Key == nil {
[60]960 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]961 }
[60]962 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]963}
964
965func inArray(b []byte, a [][]byte) bool {
966 for _, b2 := range a {
967 if bytes.Equal(b, b2) {
968 return true
969 }
970 }
971 return false
972}
973
974func hash(msg string, key []byte) string {
975 mac := hmac.New(sha256.New, key)
976 mac.Write([]byte(msg))
977 return hex.EncodeToString(mac.Sum(nil))
978}
979
980func verifyRequestURI(uri, hashMsg, key []byte) bool {
981 h := make([]byte, hex.DecodedLen(len(hashMsg)))
982 _, err := hex.Decode(h, hashMsg)
983 if err != nil {
[97]984 if DEBUG {
985 log.Println("hmac error:", err)
986 }
[1]987 return false
988 }
989 mac := hmac.New(sha256.New, key)
990 mac.Write(uri)
991 return hmac.Equal(h, mac.Sum(nil))
992}
993
[69]994func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
995 ctx.SetContentType("text/html")
996 ctx.SetStatusCode(403)
997 ctx.Write([]byte(MORTY_HTML_PAGE_START))
998 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
999 ctx.Write([]byte("<p>Following</p><p><a href=\""))
1000 ctx.Write([]byte(html.EscapeString(uri.String())))
1001 ctx.Write([]byte("\" rel=\"noreferrer\">"))
1002 ctx.Write([]byte(html.EscapeString(uri.String())))
1003 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1004 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1005}
1006
[35]1007func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]1008 ctx.SetContentType("text/html; charset=UTF-8")
[35]1009 ctx.SetStatusCode(statusCode)
[69]1010 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]1011 if err != nil {
[97]1012 if DEBUG {
1013 log.Println("error:", err)
1014 }
[11]1015 ctx.Write([]byte("<h2>Error: "))
1016 ctx.Write([]byte(html.EscapeString(err.Error())))
1017 ctx.Write([]byte("</h2>"))
1018 }
[1]1019 if p.Key == nil {
1020 ctx.Write([]byte(`
[36]1021 <form action="post">
1022 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
1023 <input type="submit" value="go" />
1024 </form>`))
[11]1025 } else {
1026 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]1027 }
[69]1028 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]1029}
1030
1031func main() {
[78]1032 default_listen_addr := os.Getenv("MORTY_ADDRESS")
1033 if default_listen_addr == "" {
1034 default_listen_addr = "127.0.0.1:3000"
1035 }
1036 default_key := os.Getenv("MORTY_KEY")
1037 listen := flag.String("listen", default_listen_addr, "Listen address")
[92]1038 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
[24]1039 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[74]1040 version := flag.Bool("version", false, "Show version")
[4]1041 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[109]1042 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
[1]1043 flag.Parse()
1044
[74]1045 if *version {
1046 fmt.Println(VERSION)
1047 return
1048 }
1049
[24]1050 if *ipv6 {
[109]1051 CLIENT.DialDualStack = true
[24]1052 }
1053
[109]1054 if *socks5 != "" {
1055 // this disables CLIENT.DialDualStack
1056 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1057 }
1058
[4]1059 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]1060
1061 if *key != "" {
[92]1062 var err error
1063 p.Key, err = base64.StdEncoding.DecodeString(*key)
[94]1064 if err != nil {
1065 log.Fatal("Error parsing -key", err.Error())
1066 os.Exit(1)
[92]1067 }
[1]1068 }
1069
1070 log.Println("listening on", *listen)
1071
1072 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1073 log.Fatal("Error in ListenAndServe:", err)
1074 }
1075}
Note: See TracBrowser for help on using the repository browser.