source: code/trunk/morty.go@ 127

Last change on this file since 127 was 127, checked in by asciimoo, 5 years ago

[mod] create own module for config

File size: 29.7 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
[121]12 "html/template"
[1]13 "io"
14 "log"
[68]15 "mime"
[1]16 "net/url"
[78]17 "os"
[68]18 "path/filepath"
[1]19 "regexp"
20 "strings"
[4]21 "time"
[60]22 "unicode/utf8"
[1]23
24 "github.com/valyala/fasthttp"
[109]25 "github.com/valyala/fasthttp/fasthttpproxy"
[1]26 "golang.org/x/net/html"
[45]27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
[68]29
[127]30 "github.com/asciimoo/morty/config"
[68]31 "github.com/asciimoo/morty/contenttype"
[1]32)
33
34const (
35 STATE_DEFAULT int = 0
36 STATE_IN_STYLE int = 1
37 STATE_IN_NOSCRIPT int = 2
38)
39
[77]40const VERSION = "v0.2.0"
[74]41
[1]42var CLIENT *fasthttp.Client = &fasthttp.Client{
43 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
[113]44 ReadBufferSize: 16 * 1024, // 16K
[1]45}
46
[127]47var cfg *config.Config = config.DefaultConfig
[1]48
[68]49var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
50 // html
51 contenttype.NewFilterEquals("text", "html", ""),
52 contenttype.NewFilterEquals("application", "xhtml", "xml"),
53 // css
54 contenttype.NewFilterEquals("text", "css", ""),
55 // images
56 contenttype.NewFilterEquals("image", "gif", ""),
57 contenttype.NewFilterEquals("image", "png", ""),
58 contenttype.NewFilterEquals("image", "jpeg", ""),
59 contenttype.NewFilterEquals("image", "pjpeg", ""),
60 contenttype.NewFilterEquals("image", "webp", ""),
61 contenttype.NewFilterEquals("image", "tiff", ""),
62 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
63 contenttype.NewFilterEquals("image", "bmp", ""),
64 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
[88]65 contenttype.NewFilterEquals("image", "x-icon", ""),
[68]66 // fonts
67 contenttype.NewFilterEquals("application", "font-otf", ""),
68 contenttype.NewFilterEquals("application", "font-ttf", ""),
69 contenttype.NewFilterEquals("application", "font-woff", ""),
70 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
71})
72
73var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
74 // texts
75 contenttype.NewFilterEquals("text", "csv", ""),
[103]76 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
[68]77 contenttype.NewFilterEquals("text", "plain", ""),
78 // API
79 contenttype.NewFilterEquals("application", "json", ""),
80 // Documents
81 contenttype.NewFilterEquals("application", "x-latex", ""),
82 contenttype.NewFilterEquals("application", "pdf", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
87 // Compressed archives
88 contenttype.NewFilterEquals("application", "zip", ""),
89 contenttype.NewFilterEquals("application", "gzip", ""),
90 contenttype.NewFilterEquals("application", "x-compressed", ""),
91 contenttype.NewFilterEquals("application", "x-gtar", ""),
92 contenttype.NewFilterEquals("application", "x-compress", ""),
93 // Generic binary
94 contenttype.NewFilterEquals("application", "octet-stream", ""),
95})
96
97var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
98 "charset": true,
99}
100
[1]101var UNSAFE_ELEMENTS [][]byte = [][]byte{
102 []byte("applet"),
103 []byte("canvas"),
104 []byte("embed"),
105 //[]byte("iframe"),
[46]106 []byte("math"),
[1]107 []byte("script"),
[46]108 []byte("svg"),
[1]109}
110
111var SAFE_ATTRIBUTES [][]byte = [][]byte{
112 []byte("abbr"),
113 []byte("accesskey"),
114 []byte("align"),
115 []byte("alt"),
[13]116 []byte("as"),
[1]117 []byte("autocomplete"),
118 []byte("charset"),
119 []byte("checked"),
120 []byte("class"),
121 []byte("content"),
122 []byte("contenteditable"),
123 []byte("contextmenu"),
124 []byte("dir"),
125 []byte("for"),
126 []byte("height"),
127 []byte("hidden"),
[46]128 []byte("hreflang"),
[1]129 []byte("id"),
130 []byte("lang"),
131 []byte("media"),
132 []byte("method"),
133 []byte("name"),
134 []byte("nowrap"),
135 []byte("placeholder"),
136 []byte("property"),
137 []byte("rel"),
138 []byte("spellcheck"),
139 []byte("tabindex"),
140 []byte("target"),
141 []byte("title"),
142 []byte("translate"),
143 []byte("type"),
144 []byte("value"),
145 []byte("width"),
146}
147
[46]148var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
149 []byte("alternate"),
150 []byte("archives"),
151 []byte("author"),
152 []byte("copyright"),
153 []byte("first"),
154 []byte("help"),
155 []byte("icon"),
156 []byte("index"),
157 []byte("last"),
158 []byte("license"),
159 []byte("manifest"),
160 []byte("next"),
161 []byte("pingback"),
162 []byte("prev"),
163 []byte("publisher"),
164 []byte("search"),
165 []byte("shortcut icon"),
166 []byte("stylesheet"),
167 []byte("up"),
168}
169
170var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
171 // X-UA-Compatible will be added automaticaly, so it can be skipped
172 []byte("date"),
173 []byte("last-modified"),
[50]174 []byte("refresh"), // URL rewrite
[46]175 // []byte("location"), TODO URL rewrite
176 []byte("content-language"),
177}
178
[127]179var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
180
[1]181type Proxy struct {
[4]182 Key []byte
183 RequestTimeout time.Duration
[1]184}
185
186type RequestConfig struct {
[124]187 Key []byte
188 BaseURL *url.URL
189 BodyInjected bool
[1]190}
191
[121]192type HTMLBodyExtParam struct {
193 BaseURL string
194 HasMortyKey bool
195}
[1]196
[121]197type HTMLFormExtParam struct {
[122]198 BaseURL string
199 MortyHash string
[121]200}
[1]201
[121]202var HTML_FORM_EXTENSION *template.Template
203var HTML_BODY_EXTENSION *template.Template
[46]204var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
205<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]206<meta name="referrer" content="no-referrer">
[46]207`
[45]208
[69]209var MORTY_HTML_PAGE_START string = `<!doctype html>
210<html>
211<head>
212<title>MortyProxy</title>
213<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
214<style>
215html { height: 100%; }
216body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
217input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
218input[placeholder] { width:80%; }
219a { text-decoration: none; #2980b9; }
220h1, h2 { font-weight: 200; margin-bottom: 2rem; }
221h1 { font-size: 3em; }
222.container { flex:1; min-height: 100%; margin-bottom: 1em; }
223.footer { margin: 1em; }
224.footer p { font-size: 0.8em; }
225</style>
226</head>
227<body>
228 <div class="container">
229 <h1>MortyProxy</h1>
230`
231
232var MORTY_HTML_PAGE_END string = `
233 </div>
234 <div class="footer">
235 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
236 <a href="https://github.com/asciimoo/morty">view on github</a>
237 </p>
238 </div>
239</body>
240</html>`
241
[67]242var FAVICON_BYTES []byte
243
244func init() {
245 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
246
247 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
[121]248 var err error
249 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
[123]250 `<input type="hidden" name="mortyurl" value="{{.BaseURL}}" />{{if .MortyHash}}<input type="hidden" name="mortyhash" value="{{.MortyHash}}" />{{end}}`)
[121]251 if err != nil {
252 panic(err)
253 }
254 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
255<input type="checkbox" id="mortytoggle" autocomplete="off" />
256<div id="mortyheader">
257 <form method="get">
258 <label for="mortytoggle">hide</label>
259 <span><a href="/">Morty Proxy</a></span>
260 <input type="url" value="{{.BaseURL}}" name="mortyurl" {{if .HasMortyKey }}readonly="true"{{end}} />
261 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
262 </form>
263</div>
264<style>
265body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
266#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
[126]267#mortyheader * { padding: 0; margin: 0; }
[121]268#mortyheader p { padding: 0 0 0.7em 0; display: block; }
269#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
270#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
271#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
272input[type=checkbox]#mortytoggle { display: none; }
273input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
274#mortyheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
275</style>
276`)
277 if err != nil {
278 panic(err)
279 }
[67]280}
281
[1]282func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]283
284 if appRequestHandler(ctx) {
285 return
286 }
287
[1]288 requestHash := popRequestParam(ctx, []byte("mortyhash"))
289
290 requestURI := popRequestParam(ctx, []byte("mortyurl"))
291
292 if requestURI == nil {
[35]293 p.serveMainPage(ctx, 200, nil)
[1]294 return
295 }
296
297 if p.Key != nil {
298 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]299 // HTTP status code 403 : Forbidden
300 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]301 return
302 }
303 }
304
[118]305 requestURIQuery := ctx.QueryArgs().QueryString()
306 if len(requestURIQuery) > 0 {
[125]307 if bytes.ContainsRune(requestURI, '?') {
308 requestURI = append(requestURI, '&')
309 } else {
310 requestURI = append(requestURI, '?')
311 }
[118]312 requestURI = append(requestURI, requestURIQuery...)
313 }
314
[97]315 parsedURI, err := url.Parse(string(requestURI))
[1]316
[11]317 if err != nil {
[35]318 // HTTP status code 500 : Internal Server Error
319 p.serveMainPage(ctx, 500, err)
[1]320 return
321 }
322
[120]323 if parsedURI.Scheme == "" {
324 parsedURI.Scheme = "https"
325 requestURI = append([]byte("https://"), requestURI...)
326 }
327
[69]328 // Serve an intermediate page for protocols other than HTTP(S)
329 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
330 p.serveExitMortyPage(ctx, parsedURI)
331 return
332 }
333
[1]334 req := fasthttp.AcquireRequest()
335 defer fasthttp.ReleaseRequest(req)
[12]336 req.SetConnectionClose()
[1]337
[47]338 requestURIStr := string(requestURI)
[1]339
[127]340 if cfg.Debug {
[97]341 log.Println("getting", requestURIStr)
342 }
[1]343
[47]344 req.SetRequestURI(requestURIStr)
[111]345 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
[1]346
347 resp := fasthttp.AcquireResponse()
348 defer fasthttp.ReleaseResponse(resp)
349
350 req.Header.SetMethodBytes(ctx.Method())
351 if ctx.IsPost() || ctx.IsPut() {
352 req.SetBody(ctx.PostBody())
353 }
354
[11]355 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
356
357 if err != nil {
[35]358 if err == fasthttp.ErrTimeout {
359 // HTTP status code 504 : Gateway Time-Out
360 p.serveMainPage(ctx, 504, err)
361 } else {
362 // HTTP status code 500 : Internal Server Error
363 p.serveMainPage(ctx, 500, err)
364 }
[1]365 return
366 }
367
368 if resp.StatusCode() != 200 {
369 switch resp.StatusCode() {
[7]370 case 301, 302, 303, 307, 308:
[1]371 loc := resp.Header.Peek("Location")
372 if loc != nil {
[97]373 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
374 url, err := rc.ProxifyURI(loc)
375 if err == nil {
376 ctx.SetStatusCode(resp.StatusCode())
377 ctx.Response.Header.Add("Location", url)
[127]378 if cfg.Debug {
[97]379 log.Println("redirect to", string(loc))
[96]380 }
[1]381 return
382 }
383 }
384 }
[47]385 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]386 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]387 return
388 }
389
[68]390 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]391
[68]392 if contentTypeBytes == nil {
[35]393 // HTTP status code 503 : Service Unavailable
394 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]395 return
396 }
397
[68]398 contentTypeString := string(contentTypeBytes)
399
400 // decode Content-Type header
401 contentType, error := contenttype.ParseContentType(contentTypeString)
402 if error != nil {
403 // HTTP status code 503 : Service Unavailable
404 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]405 return
406 }
407
[68]408 // content-disposition
409 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]410
[68]411 // check content type
412 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
413 // it is not a usual content type
414 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
415 // force attachment for allowed content type
416 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
417 } else {
418 // deny access to forbidden content type
419 // HTTP status code 403 : Forbidden
420 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
421 return
422 }
423 }
424
425 // HACK : replace */xhtml by text/html
426 if contentType.SubType == "xhtml" {
427 contentType.TopLevelType = "text"
428 contentType.SubType = "html"
429 contentType.Suffix = ""
430 }
431
432 // conversion to UTF-8
[1]433 var responseBody []byte
434
[68]435 if contentType.TopLevelType == "text" {
436 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]437 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
438 responseBody, err = e.NewDecoder().Bytes(resp.Body())
439 if err != nil {
440 // HTTP status code 503 : Service Unavailable
441 p.serveMainPage(ctx, 503, err)
442 return
443 }
444 } else {
445 responseBody = resp.Body()
[1]446 }
[68]447 // update the charset or specify it
448 contentType.Parameters["charset"] = "UTF-8"
[1]449 } else {
450 responseBody = resp.Body()
451 }
452
[68]453 //
454 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]455
[68]456 // set the content type
457 ctx.SetContentType(contentType.String())
458
459 // output according to MIME type
[1]460 switch {
[68]461 case contentType.SubType == "css" && contentType.Suffix == "":
[23]462 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]463 case contentType.SubType == "html" && contentType.Suffix == "":
[124]464 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
465 sanitizeHTML(rc, ctx, responseBody)
466 if !rc.BodyInjected {
467 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
468 if len(rc.Key) > 0 {
469 p.HasMortyKey = true
470 }
471 err := HTML_BODY_EXTENSION.Execute(ctx, p)
472 if err != nil {
[127]473 if cfg.Debug {
[124]474 fmt.Println("failed to inject body extension", err)
475 }
476 }
477 }
[1]478 default:
[68]479 if contentDispositionBytes != nil {
480 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]481 }
[1]482 ctx.Write(responseBody)
483 }
484}
485
[68]486// force content-disposition to attachment
487func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
488 var contentDispositionParams map[string]string
489
490 if contentDispositionBytes != nil {
491 var err error
492 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
493 if err != nil {
494 contentDispositionParams = make(map[string]string)
495 }
496 } else {
497 contentDispositionParams = make(map[string]string)
498 }
499
500 _, fileNameDefined := contentDispositionParams["filename"]
501 if !fileNameDefined {
502 // TODO : sanitize filename
503 contentDispositionParams["fileName"] = filepath.Base(url.Path)
504 }
505
506 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
507}
508
[10]509func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]510 // serve robots.txt
[10]511 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
512 ctx.SetContentType("text/plain")
513 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
514 return true
515 }
[11]516
[67]517 // server favicon.ico
518 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
519 ctx.SetContentType("image/png")
520 ctx.Write(FAVICON_BYTES)
521 return true
522 }
523
[10]524 return false
525}
526
[1]527func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
528 param := ctx.QueryArgs().PeekBytes(paramName)
529
530 if param == nil {
531 param = ctx.PostArgs().PeekBytes(paramName)
[121]532 ctx.PostArgs().DelBytes(paramName)
[1]533 }
[121]534 ctx.QueryArgs().DelBytes(paramName)
[1]535
536 return param
537}
538
[9]539func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]540 // TODO
541
542 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
543
544 if urlSlices == nil {
[9]545 out.Write(css)
[1]546 return
547 }
548
549 startIndex := 0
550
551 for _, s := range urlSlices {
[15]552 urlStart := s[4]
553 urlEnd := s[5]
[1]554
[60]555 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]556 out.Write(css[startIndex:urlStart])
557 out.Write([]byte(uri))
[1]558 startIndex = urlEnd
[127]559 } else if cfg.Debug {
[36]560 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]561 }
562 }
563 if startIndex < len(css) {
[9]564 out.Write(css[startIndex:len(css)])
[1]565 }
566}
567
[9]568func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]569 r := bytes.NewReader(htmlDoc)
570 decoder := html.NewTokenizer(r)
571 decoder.AllowCDATA(true)
572
573 unsafeElements := make([][]byte, 0, 8)
574 state := STATE_DEFAULT
575 for {
576 token := decoder.Next()
577 if token == html.ErrorToken {
578 err := decoder.Err()
579 if err != io.EOF {
[97]580 log.Println("failed to parse HTML")
[1]581 }
582 break
583 }
584
585 if len(unsafeElements) == 0 {
586
587 switch token {
588 case html.StartTagToken, html.SelfClosingTagToken:
589 tag, hasAttrs := decoder.TagName()
590 safe := !inArray(tag, UNSAFE_ELEMENTS)
591 if !safe {
[116]592 if token != html.SelfClosingTagToken {
[1]593 var unsafeTag []byte = make([]byte, len(tag))
594 copy(unsafeTag, tag)
595 unsafeElements = append(unsafeElements, unsafeTag)
596 }
597 break
598 }
[38]599 if bytes.Equal(tag, []byte("base")) {
600 for {
601 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]602 if bytes.Equal(attrName, []byte("href")) {
603 parsedURI, err := url.Parse(string(attrValue))
604 if err == nil {
605 rc.BaseURL = parsedURI
606 }
[38]607 }
608 if !moreAttr {
609 break
610 }
611 }
612 break
613 }
[1]614 if bytes.Equal(tag, []byte("noscript")) {
615 state = STATE_IN_NOSCRIPT
616 break
617 }
618 var attrs [][][]byte
619 if hasAttrs {
620 for {
621 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]622 attrs = append(attrs, [][]byte{
623 attrName,
624 attrValue,
625 []byte(html.EscapeString(string(attrValue))),
626 })
[1]627 if !moreAttr {
628 break
629 }
630 }
[13]631 }
632 if bytes.Equal(tag, []byte("link")) {
633 sanitizeLinkTag(rc, out, attrs)
634 break
635 }
636
[45]637 if bytes.Equal(tag, []byte("meta")) {
638 sanitizeMetaTag(rc, out, attrs)
639 break
640 }
641
[13]642 fmt.Fprintf(out, "<%s", tag)
643
644 if hasAttrs {
[45]645 sanitizeAttrs(rc, out, attrs)
[1]646 }
[13]647
[1]648 if token == html.SelfClosingTagToken {
[9]649 fmt.Fprintf(out, " />")
[1]650 } else {
[9]651 fmt.Fprintf(out, ">")
[1]652 if bytes.Equal(tag, []byte("style")) {
653 state = STATE_IN_STYLE
654 }
655 }
[13]656
[45]657 if bytes.Equal(tag, []byte("head")) {
[46]658 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]659 }
660
[1]661 if bytes.Equal(tag, []byte("form")) {
662 var formURL *url.URL
663 for _, attr := range attrs {
664 if bytes.Equal(attr[0], []byte("action")) {
665 formURL, _ = url.Parse(string(attr[1]))
[28]666 formURL = mergeURIs(rc.BaseURL, formURL)
[1]667 break
668 }
669 }
670 if formURL == nil {
[23]671 formURL = rc.BaseURL
[1]672 }
[2]673 urlStr := formURL.String()
674 var key string
675 if rc.Key != nil {
676 key = hash(urlStr, rc.Key)
677 }
[121]678 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key})
679 if err != nil {
[127]680 if cfg.Debug {
[121]681 fmt.Println("failed to inject body extension", err)
682 }
683 }
[1]684 }
685
686 case html.EndTagToken:
687 tag, _ := decoder.TagName()
688 writeEndTag := true
689 switch string(tag) {
690 case "body":
[121]691 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
692 if len(rc.Key) > 0 {
693 p.HasMortyKey = true
694 }
695 err := HTML_BODY_EXTENSION.Execute(out, p)
696 if err != nil {
[127]697 if cfg.Debug {
[121]698 fmt.Println("failed to inject body extension", err)
699 }
700 }
[124]701 rc.BodyInjected = true
[1]702 case "style":
703 state = STATE_DEFAULT
704 case "noscript":
705 state = STATE_DEFAULT
706 writeEndTag = false
707 }
708 // skip noscript tags - only the tag, not the content, because javascript is sanitized
709 if writeEndTag {
[9]710 fmt.Fprintf(out, "</%s>", tag)
[1]711 }
712
713 case html.TextToken:
714 switch state {
715 case STATE_DEFAULT:
[9]716 fmt.Fprintf(out, "%s", decoder.Raw())
[1]717 case STATE_IN_STYLE:
[9]718 sanitizeCSS(rc, out, decoder.Raw())
[1]719 case STATE_IN_NOSCRIPT:
[9]720 sanitizeHTML(rc, out, decoder.Raw())
[1]721 }
722
[62]723 case html.CommentToken:
724 // ignore comment. TODO : parse IE conditional comment
725
726 case html.DoctypeToken:
[9]727 out.Write(decoder.Raw())
[1]728 }
729 } else {
730 switch token {
[116]731 case html.StartTagToken, html.SelfClosingTagToken:
[1]732 tag, _ := decoder.TagName()
733 if inArray(tag, UNSAFE_ELEMENTS) {
734 unsafeElements = append(unsafeElements, tag)
735 }
736
737 case html.EndTagToken:
738 tag, _ := decoder.TagName()
739 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
740 unsafeElements = unsafeElements[:len(unsafeElements)-1]
741 }
742 }
743 }
744 }
745}
746
[13]747func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
748 exclude := false
749 for _, attr := range attrs {
750 attrName := attr[0]
751 attrValue := attr[1]
752 if bytes.Equal(attrName, []byte("rel")) {
[46]753 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]754 exclude = true
755 break
756 }
757 }
758 if bytes.Equal(attrName, []byte("as")) {
759 if bytes.Equal(attrValue, []byte("script")) {
760 exclude = true
761 break
762 }
763 }
764 }
765
766 if !exclude {
767 out.Write([]byte("<link"))
768 for _, attr := range attrs {
[21]769 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]770 }
771 out.Write([]byte(">"))
772 }
773}
774
[45]775func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]776 var http_equiv []byte
777 var content []byte
778
779 for _, attr := range attrs {
780 attrName := attr[0]
781 attrValue := attr[1]
782 if bytes.Equal(attrName, []byte("http-equiv")) {
783 http_equiv = bytes.ToLower(attrValue)
[46]784 // exclude some <meta http-equiv="..." ..>
785 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
786 return
787 }
[1]788 }
789 if bytes.Equal(attrName, []byte("content")) {
790 content = attrValue
791 }
[45]792 if bytes.Equal(attrName, []byte("charset")) {
793 // exclude <meta charset="...">
794 return
795 }
[1]796 }
797
[45]798 out.Write([]byte("<meta"))
[14]799 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
800 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
801 contentUrl := content[urlIndex+4:]
[36]802 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]803 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]804 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]805 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]806 }
807 }
808 // output proxify result
[60]809 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]810 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]811 }
812 } else {
[46]813 if len(http_equiv) > 0 {
814 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
815 }
[9]816 sanitizeAttrs(rc, out, attrs)
[1]817 }
[45]818 out.Write([]byte(">"))
[1]819}
820
[9]821func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]822 for _, attr := range attrs {
[21]823 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]824 }
825}
826
[21]827func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]828 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]829 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]830 return
831 }
832 switch string(attrName) {
833 case "src", "href", "action":
[60]834 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]835 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[127]836 } else if cfg.Debug {
[36]837 log.Println("cannot proxify uri:", string(attrValue))
[1]838 }
839 case "style":
[21]840 cssAttr := bytes.NewBuffer(nil)
841 sanitizeCSS(rc, cssAttr, attrValue)
842 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]843 }
844}
845
[36]846func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]847 if u2 == nil {
848 return u1
849 }
[28]850 return u1.ResolveReference(u2)
[1]851}
852
[60]853// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
854// avoid memory allocation (except for the scheme)
855func sanitizeURI(uri []byte) ([]byte, string) {
856 first_rune_index := 0
857 first_rune_seen := false
858 scheme_last_index := -1
859 buffer := bytes.NewBuffer(make([]byte, 0, 10))
860
861 // remove trailing space and special characters
862 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
863
864 // loop over byte by byte
865 for i, c := range uri {
866 // ignore special characters and space (c <= 32)
867 if c > 32 {
868 // append to the lower case of the rune to buffer
869 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
870 c = c + 'a' - 'A'
871 }
872
873 buffer.WriteByte(c)
874
875 // update the first rune index that is not a special rune
876 if !first_rune_seen {
877 first_rune_index = i
878 first_rune_seen = true
879 }
880
881 if c == ':' {
882 // colon rune found, we have found the scheme
883 scheme_last_index = i
884 break
885 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
886 // special case : most probably a relative URI
887 break
888 }
889 }
890 }
891
892 if scheme_last_index != -1 {
893 // scheme found
894 // copy the "lower case without special runes scheme" before the ":" rune
895 scheme_start_index := scheme_last_index - buffer.Len() + 1
896 copy(uri[scheme_start_index:], buffer.Bytes())
897 // and return the result
898 return uri[scheme_start_index:], buffer.String()
899 } else {
900 // scheme NOT found
901 return uri[first_rune_index:], ""
902 }
903}
904
905func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
906 // sanitize URI
907 uri, scheme := sanitizeURI(uri)
908
[28]909 // remove javascript protocol
[60]910 if scheme == "javascript:" {
[28]911 return "", nil
912 }
[57]913
[1]914 // TODO check malicious data: - e.g. data:script
[60]915 if scheme == "data:" {
[61]916 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
917 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
918 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
919 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
920 bytes.HasPrefix(uri, []byte("data:image/webp")) {
921 // should be safe
922 return string(uri), nil
923 } else {
924 // unsafe data
925 return "", nil
926 }
[1]927 }
928
[57]929 // parse the uri
[60]930 u, err := url.Parse(string(uri))
[1]931 if err != nil {
932 return "", err
933 }
[57]934
935 // get the fragment (with the prefix "#")
936 fragment := ""
937 if len(u.Fragment) > 0 {
938 fragment = "#" + u.Fragment
939 }
940
941 // reset the fragment: it is not included in the mortyurl
942 u.Fragment = ""
943
944 // merge the URI with the document URI
[28]945 u = mergeURIs(rc.BaseURL, u)
[1]946
[57]947 // simple internal link ?
948 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
949 if u.Scheme == rc.BaseURL.Scheme &&
950 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
951 u.Host == rc.BaseURL.Host &&
952 u.Path == rc.BaseURL.Path &&
953 u.RawQuery == rc.BaseURL.RawQuery {
954 // the fragment is the only difference between the document URI and the uri parameter
955 return fragment, nil
956 }
957
958 // return full URI and fragment (if not empty)
[60]959 morty_uri := u.String()
[1]960
961 if rc.Key == nil {
[60]962 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]963 }
[60]964 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]965}
966
967func inArray(b []byte, a [][]byte) bool {
968 for _, b2 := range a {
969 if bytes.Equal(b, b2) {
970 return true
971 }
972 }
973 return false
974}
975
976func hash(msg string, key []byte) string {
977 mac := hmac.New(sha256.New, key)
978 mac.Write([]byte(msg))
979 return hex.EncodeToString(mac.Sum(nil))
980}
981
982func verifyRequestURI(uri, hashMsg, key []byte) bool {
983 h := make([]byte, hex.DecodedLen(len(hashMsg)))
984 _, err := hex.Decode(h, hashMsg)
985 if err != nil {
[127]986 if cfg.Debug {
[97]987 log.Println("hmac error:", err)
988 }
[1]989 return false
990 }
991 mac := hmac.New(sha256.New, key)
992 mac.Write(uri)
993 return hmac.Equal(h, mac.Sum(nil))
994}
995
[69]996func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
997 ctx.SetContentType("text/html")
998 ctx.SetStatusCode(403)
999 ctx.Write([]byte(MORTY_HTML_PAGE_START))
1000 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
1001 ctx.Write([]byte("<p>Following</p><p><a href=\""))
1002 ctx.Write([]byte(html.EscapeString(uri.String())))
1003 ctx.Write([]byte("\" rel=\"noreferrer\">"))
1004 ctx.Write([]byte(html.EscapeString(uri.String())))
1005 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1006 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1007}
1008
[35]1009func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]1010 ctx.SetContentType("text/html; charset=UTF-8")
[35]1011 ctx.SetStatusCode(statusCode)
[69]1012 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]1013 if err != nil {
[127]1014 if cfg.Debug {
[97]1015 log.Println("error:", err)
1016 }
[11]1017 ctx.Write([]byte("<h2>Error: "))
1018 ctx.Write([]byte(html.EscapeString(err.Error())))
1019 ctx.Write([]byte("</h2>"))
1020 }
[1]1021 if p.Key == nil {
1022 ctx.Write([]byte(`
[36]1023 <form action="post">
1024 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
1025 <input type="submit" value="go" />
1026 </form>`))
[11]1027 } else {
1028 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]1029 }
[69]1030 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]1031}
1032
1033func main() {
[127]1034 cfg.ListenAddress = *flag.String("listen", cfg.ListenAddress, "Listen address")
1035 cfg.Key = *flag.String("key", cfg.Key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
1036 cfg.IPV6 = *flag.Bool("ipv6", cfg.IPV6, "Allow IPv6 HTTP requests")
1037 cfg.Debug = *flag.Bool("debug", cfg.Debug, "Debug mode")
1038 cfg.RequestTimeout = *flag.Uint("timeout", cfg.RequestTimeout, "Request timeout")
[74]1039 version := flag.Bool("version", false, "Show version")
[4]1040 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[109]1041 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
[1]1042 flag.Parse()
1043
[74]1044 if *version {
1045 fmt.Println(VERSION)
1046 return
1047 }
1048
[24]1049 if *ipv6 {
[109]1050 CLIENT.DialDualStack = true
[24]1051 }
1052
[109]1053 if *socks5 != "" {
1054 // this disables CLIENT.DialDualStack
1055 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1056 }
[127]1057 if cfg.IPV6 {
1058 CLIENT.Dial = fasthttp.DialDualStack
1059 }
[109]1060
[127]1061 p := &Proxy{RequestTimeout: time.Duration(cfg.RequestTimeout) * time.Second}
[1]1062
[127]1063 if cfg.Key != "" {
[92]1064 var err error
[127]1065 p.Key, err = base64.StdEncoding.DecodeString(cfg.Key)
[94]1066 if err != nil {
1067 log.Fatal("Error parsing -key", err.Error())
1068 os.Exit(1)
[92]1069 }
[1]1070 }
1071
[127]1072 log.Println("listening on", cfg.ListenAddress)
[1]1073
[127]1074 if err := fasthttp.ListenAndServe(cfg.ListenAddress, p.RequestHandler); err != nil {
[1]1075 log.Fatal("Error in ListenAndServe:", err)
1076 }
1077}
Note: See TracBrowser for help on using the repository browser.