source: code/trunk/morty.go@ 126

Last change on this file since 126 was 126, checked in by asciimoo, 5 years ago

[fix] add default css reset parameters to topbar

File size: 29.5 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
[121]12 "html/template"
[1]13 "io"
14 "log"
[68]15 "mime"
[1]16 "net/url"
[78]17 "os"
[68]18 "path/filepath"
[1]19 "regexp"
20 "strings"
[4]21 "time"
[60]22 "unicode/utf8"
[1]23
24 "github.com/valyala/fasthttp"
[109]25 "github.com/valyala/fasthttp/fasthttpproxy"
[1]26 "golang.org/x/net/html"
[45]27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
[68]29
30 "github.com/asciimoo/morty/contenttype"
[1]31)
32
33const (
34 STATE_DEFAULT int = 0
35 STATE_IN_STYLE int = 1
36 STATE_IN_NOSCRIPT int = 2
37)
38
[77]39const VERSION = "v0.2.0"
[74]40
[100]41var DEBUG = os.Getenv("DEBUG") != "false"
[96]42
[1]43var CLIENT *fasthttp.Client = &fasthttp.Client{
44 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
[113]45 ReadBufferSize: 16 * 1024, // 16K
[1]46}
47
[27]48var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]49
[68]50var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
51 // html
52 contenttype.NewFilterEquals("text", "html", ""),
53 contenttype.NewFilterEquals("application", "xhtml", "xml"),
54 // css
55 contenttype.NewFilterEquals("text", "css", ""),
56 // images
57 contenttype.NewFilterEquals("image", "gif", ""),
58 contenttype.NewFilterEquals("image", "png", ""),
59 contenttype.NewFilterEquals("image", "jpeg", ""),
60 contenttype.NewFilterEquals("image", "pjpeg", ""),
61 contenttype.NewFilterEquals("image", "webp", ""),
62 contenttype.NewFilterEquals("image", "tiff", ""),
63 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
64 contenttype.NewFilterEquals("image", "bmp", ""),
65 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
[88]66 contenttype.NewFilterEquals("image", "x-icon", ""),
[68]67 // fonts
68 contenttype.NewFilterEquals("application", "font-otf", ""),
69 contenttype.NewFilterEquals("application", "font-ttf", ""),
70 contenttype.NewFilterEquals("application", "font-woff", ""),
71 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
72})
73
74var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
75 // texts
76 contenttype.NewFilterEquals("text", "csv", ""),
[103]77 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
[68]78 contenttype.NewFilterEquals("text", "plain", ""),
79 // API
80 contenttype.NewFilterEquals("application", "json", ""),
81 // Documents
82 contenttype.NewFilterEquals("application", "x-latex", ""),
83 contenttype.NewFilterEquals("application", "pdf", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
88 // Compressed archives
89 contenttype.NewFilterEquals("application", "zip", ""),
90 contenttype.NewFilterEquals("application", "gzip", ""),
91 contenttype.NewFilterEquals("application", "x-compressed", ""),
92 contenttype.NewFilterEquals("application", "x-gtar", ""),
93 contenttype.NewFilterEquals("application", "x-compress", ""),
94 // Generic binary
95 contenttype.NewFilterEquals("application", "octet-stream", ""),
96})
97
98var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
99 "charset": true,
100}
101
[1]102var UNSAFE_ELEMENTS [][]byte = [][]byte{
103 []byte("applet"),
104 []byte("canvas"),
105 []byte("embed"),
106 //[]byte("iframe"),
[46]107 []byte("math"),
[1]108 []byte("script"),
[46]109 []byte("svg"),
[1]110}
111
112var SAFE_ATTRIBUTES [][]byte = [][]byte{
113 []byte("abbr"),
114 []byte("accesskey"),
115 []byte("align"),
116 []byte("alt"),
[13]117 []byte("as"),
[1]118 []byte("autocomplete"),
119 []byte("charset"),
120 []byte("checked"),
121 []byte("class"),
122 []byte("content"),
123 []byte("contenteditable"),
124 []byte("contextmenu"),
125 []byte("dir"),
126 []byte("for"),
127 []byte("height"),
128 []byte("hidden"),
[46]129 []byte("hreflang"),
[1]130 []byte("id"),
131 []byte("lang"),
132 []byte("media"),
133 []byte("method"),
134 []byte("name"),
135 []byte("nowrap"),
136 []byte("placeholder"),
137 []byte("property"),
138 []byte("rel"),
139 []byte("spellcheck"),
140 []byte("tabindex"),
141 []byte("target"),
142 []byte("title"),
143 []byte("translate"),
144 []byte("type"),
145 []byte("value"),
146 []byte("width"),
147}
148
[46]149var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
150 []byte("alternate"),
151 []byte("archives"),
152 []byte("author"),
153 []byte("copyright"),
154 []byte("first"),
155 []byte("help"),
156 []byte("icon"),
157 []byte("index"),
158 []byte("last"),
159 []byte("license"),
160 []byte("manifest"),
161 []byte("next"),
162 []byte("pingback"),
163 []byte("prev"),
164 []byte("publisher"),
165 []byte("search"),
166 []byte("shortcut icon"),
167 []byte("stylesheet"),
168 []byte("up"),
169}
170
171var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
172 // X-UA-Compatible will be added automaticaly, so it can be skipped
173 []byte("date"),
174 []byte("last-modified"),
[50]175 []byte("refresh"), // URL rewrite
[46]176 // []byte("location"), TODO URL rewrite
177 []byte("content-language"),
178}
179
[1]180type Proxy struct {
[4]181 Key []byte
182 RequestTimeout time.Duration
[1]183}
184
185type RequestConfig struct {
[124]186 Key []byte
187 BaseURL *url.URL
188 BodyInjected bool
[1]189}
190
[121]191type HTMLBodyExtParam struct {
192 BaseURL string
193 HasMortyKey bool
194}
[1]195
[121]196type HTMLFormExtParam struct {
[122]197 BaseURL string
198 MortyHash string
[121]199}
[1]200
[121]201var HTML_FORM_EXTENSION *template.Template
202var HTML_BODY_EXTENSION *template.Template
[46]203var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
204<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]205<meta name="referrer" content="no-referrer">
[46]206`
[45]207
[69]208var MORTY_HTML_PAGE_START string = `<!doctype html>
209<html>
210<head>
211<title>MortyProxy</title>
212<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
213<style>
214html { height: 100%; }
215body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
216input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
217input[placeholder] { width:80%; }
218a { text-decoration: none; #2980b9; }
219h1, h2 { font-weight: 200; margin-bottom: 2rem; }
220h1 { font-size: 3em; }
221.container { flex:1; min-height: 100%; margin-bottom: 1em; }
222.footer { margin: 1em; }
223.footer p { font-size: 0.8em; }
224</style>
225</head>
226<body>
227 <div class="container">
228 <h1>MortyProxy</h1>
229`
230
231var MORTY_HTML_PAGE_END string = `
232 </div>
233 <div class="footer">
234 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
235 <a href="https://github.com/asciimoo/morty">view on github</a>
236 </p>
237 </div>
238</body>
239</html>`
240
[67]241var FAVICON_BYTES []byte
242
243func init() {
244 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
245
246 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
[121]247 var err error
248 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
[123]249 `<input type="hidden" name="mortyurl" value="{{.BaseURL}}" />{{if .MortyHash}}<input type="hidden" name="mortyhash" value="{{.MortyHash}}" />{{end}}`)
[121]250 if err != nil {
251 panic(err)
252 }
253 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
254<input type="checkbox" id="mortytoggle" autocomplete="off" />
255<div id="mortyheader">
256 <form method="get">
257 <label for="mortytoggle">hide</label>
258 <span><a href="/">Morty Proxy</a></span>
259 <input type="url" value="{{.BaseURL}}" name="mortyurl" {{if .HasMortyKey }}readonly="true"{{end}} />
260 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
261 </form>
262</div>
263<style>
264body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
265#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
[126]266#mortyheader * { padding: 0; margin: 0; }
[121]267#mortyheader p { padding: 0 0 0.7em 0; display: block; }
268#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
269#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
270#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
271input[type=checkbox]#mortytoggle { display: none; }
272input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
273#mortyheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
274</style>
275`)
276 if err != nil {
277 panic(err)
278 }
[67]279}
280
[1]281func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]282
283 if appRequestHandler(ctx) {
284 return
285 }
286
[1]287 requestHash := popRequestParam(ctx, []byte("mortyhash"))
288
289 requestURI := popRequestParam(ctx, []byte("mortyurl"))
290
291 if requestURI == nil {
[35]292 p.serveMainPage(ctx, 200, nil)
[1]293 return
294 }
295
296 if p.Key != nil {
297 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]298 // HTTP status code 403 : Forbidden
299 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]300 return
301 }
302 }
303
[118]304 requestURIQuery := ctx.QueryArgs().QueryString()
305 if len(requestURIQuery) > 0 {
[125]306 if bytes.ContainsRune(requestURI, '?') {
307 requestURI = append(requestURI, '&')
308 } else {
309 requestURI = append(requestURI, '?')
310 }
[118]311 requestURI = append(requestURI, requestURIQuery...)
312 }
313
[97]314 parsedURI, err := url.Parse(string(requestURI))
[1]315
[11]316 if err != nil {
[35]317 // HTTP status code 500 : Internal Server Error
318 p.serveMainPage(ctx, 500, err)
[1]319 return
320 }
321
[120]322 if parsedURI.Scheme == "" {
323 parsedURI.Scheme = "https"
324 requestURI = append([]byte("https://"), requestURI...)
325 }
326
[69]327 // Serve an intermediate page for protocols other than HTTP(S)
328 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
329 p.serveExitMortyPage(ctx, parsedURI)
330 return
331 }
332
[1]333 req := fasthttp.AcquireRequest()
334 defer fasthttp.ReleaseRequest(req)
[12]335 req.SetConnectionClose()
[1]336
[47]337 requestURIStr := string(requestURI)
[1]338
[97]339 if DEBUG {
340 log.Println("getting", requestURIStr)
341 }
[1]342
[47]343 req.SetRequestURI(requestURIStr)
[111]344 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
[1]345
346 resp := fasthttp.AcquireResponse()
347 defer fasthttp.ReleaseResponse(resp)
348
349 req.Header.SetMethodBytes(ctx.Method())
350 if ctx.IsPost() || ctx.IsPut() {
351 req.SetBody(ctx.PostBody())
352 }
353
[11]354 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
355
356 if err != nil {
[35]357 if err == fasthttp.ErrTimeout {
358 // HTTP status code 504 : Gateway Time-Out
359 p.serveMainPage(ctx, 504, err)
360 } else {
361 // HTTP status code 500 : Internal Server Error
362 p.serveMainPage(ctx, 500, err)
363 }
[1]364 return
365 }
366
367 if resp.StatusCode() != 200 {
368 switch resp.StatusCode() {
[7]369 case 301, 302, 303, 307, 308:
[1]370 loc := resp.Header.Peek("Location")
371 if loc != nil {
[97]372 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
373 url, err := rc.ProxifyURI(loc)
374 if err == nil {
375 ctx.SetStatusCode(resp.StatusCode())
376 ctx.Response.Header.Add("Location", url)
377 if DEBUG {
378 log.Println("redirect to", string(loc))
[96]379 }
[1]380 return
381 }
382 }
383 }
[47]384 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]385 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]386 return
387 }
388
[68]389 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]390
[68]391 if contentTypeBytes == nil {
[35]392 // HTTP status code 503 : Service Unavailable
393 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]394 return
395 }
396
[68]397 contentTypeString := string(contentTypeBytes)
398
399 // decode Content-Type header
400 contentType, error := contenttype.ParseContentType(contentTypeString)
401 if error != nil {
402 // HTTP status code 503 : Service Unavailable
403 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]404 return
405 }
406
[68]407 // content-disposition
408 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]409
[68]410 // check content type
411 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
412 // it is not a usual content type
413 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
414 // force attachment for allowed content type
415 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
416 } else {
417 // deny access to forbidden content type
418 // HTTP status code 403 : Forbidden
419 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
420 return
421 }
422 }
423
424 // HACK : replace */xhtml by text/html
425 if contentType.SubType == "xhtml" {
426 contentType.TopLevelType = "text"
427 contentType.SubType = "html"
428 contentType.Suffix = ""
429 }
430
431 // conversion to UTF-8
[1]432 var responseBody []byte
433
[68]434 if contentType.TopLevelType == "text" {
435 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]436 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
437 responseBody, err = e.NewDecoder().Bytes(resp.Body())
438 if err != nil {
439 // HTTP status code 503 : Service Unavailable
440 p.serveMainPage(ctx, 503, err)
441 return
442 }
443 } else {
444 responseBody = resp.Body()
[1]445 }
[68]446 // update the charset or specify it
447 contentType.Parameters["charset"] = "UTF-8"
[1]448 } else {
449 responseBody = resp.Body()
450 }
451
[68]452 //
453 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]454
[68]455 // set the content type
456 ctx.SetContentType(contentType.String())
457
458 // output according to MIME type
[1]459 switch {
[68]460 case contentType.SubType == "css" && contentType.Suffix == "":
[23]461 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]462 case contentType.SubType == "html" && contentType.Suffix == "":
[124]463 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
464 sanitizeHTML(rc, ctx, responseBody)
465 if !rc.BodyInjected {
466 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
467 if len(rc.Key) > 0 {
468 p.HasMortyKey = true
469 }
470 err := HTML_BODY_EXTENSION.Execute(ctx, p)
471 if err != nil {
472 if DEBUG {
473 fmt.Println("failed to inject body extension", err)
474 }
475 }
476 }
[1]477 default:
[68]478 if contentDispositionBytes != nil {
479 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]480 }
[1]481 ctx.Write(responseBody)
482 }
483}
484
[68]485// force content-disposition to attachment
486func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
487 var contentDispositionParams map[string]string
488
489 if contentDispositionBytes != nil {
490 var err error
491 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
492 if err != nil {
493 contentDispositionParams = make(map[string]string)
494 }
495 } else {
496 contentDispositionParams = make(map[string]string)
497 }
498
499 _, fileNameDefined := contentDispositionParams["filename"]
500 if !fileNameDefined {
501 // TODO : sanitize filename
502 contentDispositionParams["fileName"] = filepath.Base(url.Path)
503 }
504
505 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
506}
507
[10]508func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]509 // serve robots.txt
[10]510 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
511 ctx.SetContentType("text/plain")
512 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
513 return true
514 }
[11]515
[67]516 // server favicon.ico
517 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
518 ctx.SetContentType("image/png")
519 ctx.Write(FAVICON_BYTES)
520 return true
521 }
522
[10]523 return false
524}
525
[1]526func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
527 param := ctx.QueryArgs().PeekBytes(paramName)
528
529 if param == nil {
530 param = ctx.PostArgs().PeekBytes(paramName)
[121]531 ctx.PostArgs().DelBytes(paramName)
[1]532 }
[121]533 ctx.QueryArgs().DelBytes(paramName)
[1]534
535 return param
536}
537
[9]538func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]539 // TODO
540
541 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
542
543 if urlSlices == nil {
[9]544 out.Write(css)
[1]545 return
546 }
547
548 startIndex := 0
549
550 for _, s := range urlSlices {
[15]551 urlStart := s[4]
552 urlEnd := s[5]
[1]553
[60]554 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]555 out.Write(css[startIndex:urlStart])
556 out.Write([]byte(uri))
[1]557 startIndex = urlEnd
[97]558 } else if DEBUG {
[36]559 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]560 }
561 }
562 if startIndex < len(css) {
[9]563 out.Write(css[startIndex:len(css)])
[1]564 }
565}
566
[9]567func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]568 r := bytes.NewReader(htmlDoc)
569 decoder := html.NewTokenizer(r)
570 decoder.AllowCDATA(true)
571
572 unsafeElements := make([][]byte, 0, 8)
573 state := STATE_DEFAULT
574 for {
575 token := decoder.Next()
576 if token == html.ErrorToken {
577 err := decoder.Err()
578 if err != io.EOF {
[97]579 log.Println("failed to parse HTML")
[1]580 }
581 break
582 }
583
584 if len(unsafeElements) == 0 {
585
586 switch token {
587 case html.StartTagToken, html.SelfClosingTagToken:
588 tag, hasAttrs := decoder.TagName()
589 safe := !inArray(tag, UNSAFE_ELEMENTS)
590 if !safe {
[116]591 if token != html.SelfClosingTagToken {
[1]592 var unsafeTag []byte = make([]byte, len(tag))
593 copy(unsafeTag, tag)
594 unsafeElements = append(unsafeElements, unsafeTag)
595 }
596 break
597 }
[38]598 if bytes.Equal(tag, []byte("base")) {
599 for {
600 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]601 if bytes.Equal(attrName, []byte("href")) {
602 parsedURI, err := url.Parse(string(attrValue))
603 if err == nil {
604 rc.BaseURL = parsedURI
605 }
[38]606 }
607 if !moreAttr {
608 break
609 }
610 }
611 break
612 }
[1]613 if bytes.Equal(tag, []byte("noscript")) {
614 state = STATE_IN_NOSCRIPT
615 break
616 }
617 var attrs [][][]byte
618 if hasAttrs {
619 for {
620 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]621 attrs = append(attrs, [][]byte{
622 attrName,
623 attrValue,
624 []byte(html.EscapeString(string(attrValue))),
625 })
[1]626 if !moreAttr {
627 break
628 }
629 }
[13]630 }
631 if bytes.Equal(tag, []byte("link")) {
632 sanitizeLinkTag(rc, out, attrs)
633 break
634 }
635
[45]636 if bytes.Equal(tag, []byte("meta")) {
637 sanitizeMetaTag(rc, out, attrs)
638 break
639 }
640
[13]641 fmt.Fprintf(out, "<%s", tag)
642
643 if hasAttrs {
[45]644 sanitizeAttrs(rc, out, attrs)
[1]645 }
[13]646
[1]647 if token == html.SelfClosingTagToken {
[9]648 fmt.Fprintf(out, " />")
[1]649 } else {
[9]650 fmt.Fprintf(out, ">")
[1]651 if bytes.Equal(tag, []byte("style")) {
652 state = STATE_IN_STYLE
653 }
654 }
[13]655
[45]656 if bytes.Equal(tag, []byte("head")) {
[46]657 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]658 }
659
[1]660 if bytes.Equal(tag, []byte("form")) {
661 var formURL *url.URL
662 for _, attr := range attrs {
663 if bytes.Equal(attr[0], []byte("action")) {
664 formURL, _ = url.Parse(string(attr[1]))
[28]665 formURL = mergeURIs(rc.BaseURL, formURL)
[1]666 break
667 }
668 }
669 if formURL == nil {
[23]670 formURL = rc.BaseURL
[1]671 }
[2]672 urlStr := formURL.String()
673 var key string
674 if rc.Key != nil {
675 key = hash(urlStr, rc.Key)
676 }
[121]677 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key})
678 if err != nil {
679 if DEBUG {
680 fmt.Println("failed to inject body extension", err)
681 }
682 }
[1]683 }
684
685 case html.EndTagToken:
686 tag, _ := decoder.TagName()
687 writeEndTag := true
688 switch string(tag) {
689 case "body":
[121]690 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
691 if len(rc.Key) > 0 {
692 p.HasMortyKey = true
693 }
694 err := HTML_BODY_EXTENSION.Execute(out, p)
695 if err != nil {
696 if DEBUG {
697 fmt.Println("failed to inject body extension", err)
698 }
699 }
[124]700 rc.BodyInjected = true
[1]701 case "style":
702 state = STATE_DEFAULT
703 case "noscript":
704 state = STATE_DEFAULT
705 writeEndTag = false
706 }
707 // skip noscript tags - only the tag, not the content, because javascript is sanitized
708 if writeEndTag {
[9]709 fmt.Fprintf(out, "</%s>", tag)
[1]710 }
711
712 case html.TextToken:
713 switch state {
714 case STATE_DEFAULT:
[9]715 fmt.Fprintf(out, "%s", decoder.Raw())
[1]716 case STATE_IN_STYLE:
[9]717 sanitizeCSS(rc, out, decoder.Raw())
[1]718 case STATE_IN_NOSCRIPT:
[9]719 sanitizeHTML(rc, out, decoder.Raw())
[1]720 }
721
[62]722 case html.CommentToken:
723 // ignore comment. TODO : parse IE conditional comment
724
725 case html.DoctypeToken:
[9]726 out.Write(decoder.Raw())
[1]727 }
728 } else {
729 switch token {
[116]730 case html.StartTagToken, html.SelfClosingTagToken:
[1]731 tag, _ := decoder.TagName()
732 if inArray(tag, UNSAFE_ELEMENTS) {
733 unsafeElements = append(unsafeElements, tag)
734 }
735
736 case html.EndTagToken:
737 tag, _ := decoder.TagName()
738 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
739 unsafeElements = unsafeElements[:len(unsafeElements)-1]
740 }
741 }
742 }
743 }
744}
745
[13]746func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
747 exclude := false
748 for _, attr := range attrs {
749 attrName := attr[0]
750 attrValue := attr[1]
751 if bytes.Equal(attrName, []byte("rel")) {
[46]752 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]753 exclude = true
754 break
755 }
756 }
757 if bytes.Equal(attrName, []byte("as")) {
758 if bytes.Equal(attrValue, []byte("script")) {
759 exclude = true
760 break
761 }
762 }
763 }
764
765 if !exclude {
766 out.Write([]byte("<link"))
767 for _, attr := range attrs {
[21]768 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]769 }
770 out.Write([]byte(">"))
771 }
772}
773
[45]774func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]775 var http_equiv []byte
776 var content []byte
777
778 for _, attr := range attrs {
779 attrName := attr[0]
780 attrValue := attr[1]
781 if bytes.Equal(attrName, []byte("http-equiv")) {
782 http_equiv = bytes.ToLower(attrValue)
[46]783 // exclude some <meta http-equiv="..." ..>
784 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
785 return
786 }
[1]787 }
788 if bytes.Equal(attrName, []byte("content")) {
789 content = attrValue
790 }
[45]791 if bytes.Equal(attrName, []byte("charset")) {
792 // exclude <meta charset="...">
793 return
794 }
[1]795 }
796
[45]797 out.Write([]byte("<meta"))
[14]798 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
799 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
800 contentUrl := content[urlIndex+4:]
[36]801 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]802 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]803 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]804 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]805 }
806 }
807 // output proxify result
[60]808 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]809 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]810 }
811 } else {
[46]812 if len(http_equiv) > 0 {
813 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
814 }
[9]815 sanitizeAttrs(rc, out, attrs)
[1]816 }
[45]817 out.Write([]byte(">"))
[1]818}
819
[9]820func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]821 for _, attr := range attrs {
[21]822 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]823 }
824}
825
[21]826func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]827 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]828 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]829 return
830 }
831 switch string(attrName) {
832 case "src", "href", "action":
[60]833 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]834 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[97]835 } else if DEBUG {
[36]836 log.Println("cannot proxify uri:", string(attrValue))
[1]837 }
838 case "style":
[21]839 cssAttr := bytes.NewBuffer(nil)
840 sanitizeCSS(rc, cssAttr, attrValue)
841 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]842 }
843}
844
[36]845func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]846 if u2 == nil {
847 return u1
848 }
[28]849 return u1.ResolveReference(u2)
[1]850}
851
[60]852// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
853// avoid memory allocation (except for the scheme)
854func sanitizeURI(uri []byte) ([]byte, string) {
855 first_rune_index := 0
856 first_rune_seen := false
857 scheme_last_index := -1
858 buffer := bytes.NewBuffer(make([]byte, 0, 10))
859
860 // remove trailing space and special characters
861 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
862
863 // loop over byte by byte
864 for i, c := range uri {
865 // ignore special characters and space (c <= 32)
866 if c > 32 {
867 // append to the lower case of the rune to buffer
868 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
869 c = c + 'a' - 'A'
870 }
871
872 buffer.WriteByte(c)
873
874 // update the first rune index that is not a special rune
875 if !first_rune_seen {
876 first_rune_index = i
877 first_rune_seen = true
878 }
879
880 if c == ':' {
881 // colon rune found, we have found the scheme
882 scheme_last_index = i
883 break
884 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
885 // special case : most probably a relative URI
886 break
887 }
888 }
889 }
890
891 if scheme_last_index != -1 {
892 // scheme found
893 // copy the "lower case without special runes scheme" before the ":" rune
894 scheme_start_index := scheme_last_index - buffer.Len() + 1
895 copy(uri[scheme_start_index:], buffer.Bytes())
896 // and return the result
897 return uri[scheme_start_index:], buffer.String()
898 } else {
899 // scheme NOT found
900 return uri[first_rune_index:], ""
901 }
902}
903
904func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
905 // sanitize URI
906 uri, scheme := sanitizeURI(uri)
907
[28]908 // remove javascript protocol
[60]909 if scheme == "javascript:" {
[28]910 return "", nil
911 }
[57]912
[1]913 // TODO check malicious data: - e.g. data:script
[60]914 if scheme == "data:" {
[61]915 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
916 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
917 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
918 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
919 bytes.HasPrefix(uri, []byte("data:image/webp")) {
920 // should be safe
921 return string(uri), nil
922 } else {
923 // unsafe data
924 return "", nil
925 }
[1]926 }
927
[57]928 // parse the uri
[60]929 u, err := url.Parse(string(uri))
[1]930 if err != nil {
931 return "", err
932 }
[57]933
934 // get the fragment (with the prefix "#")
935 fragment := ""
936 if len(u.Fragment) > 0 {
937 fragment = "#" + u.Fragment
938 }
939
940 // reset the fragment: it is not included in the mortyurl
941 u.Fragment = ""
942
943 // merge the URI with the document URI
[28]944 u = mergeURIs(rc.BaseURL, u)
[1]945
[57]946 // simple internal link ?
947 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
948 if u.Scheme == rc.BaseURL.Scheme &&
949 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
950 u.Host == rc.BaseURL.Host &&
951 u.Path == rc.BaseURL.Path &&
952 u.RawQuery == rc.BaseURL.RawQuery {
953 // the fragment is the only difference between the document URI and the uri parameter
954 return fragment, nil
955 }
956
957 // return full URI and fragment (if not empty)
[60]958 morty_uri := u.String()
[1]959
960 if rc.Key == nil {
[60]961 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]962 }
[60]963 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]964}
965
966func inArray(b []byte, a [][]byte) bool {
967 for _, b2 := range a {
968 if bytes.Equal(b, b2) {
969 return true
970 }
971 }
972 return false
973}
974
975func hash(msg string, key []byte) string {
976 mac := hmac.New(sha256.New, key)
977 mac.Write([]byte(msg))
978 return hex.EncodeToString(mac.Sum(nil))
979}
980
981func verifyRequestURI(uri, hashMsg, key []byte) bool {
982 h := make([]byte, hex.DecodedLen(len(hashMsg)))
983 _, err := hex.Decode(h, hashMsg)
984 if err != nil {
[97]985 if DEBUG {
986 log.Println("hmac error:", err)
987 }
[1]988 return false
989 }
990 mac := hmac.New(sha256.New, key)
991 mac.Write(uri)
992 return hmac.Equal(h, mac.Sum(nil))
993}
994
[69]995func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
996 ctx.SetContentType("text/html")
997 ctx.SetStatusCode(403)
998 ctx.Write([]byte(MORTY_HTML_PAGE_START))
999 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
1000 ctx.Write([]byte("<p>Following</p><p><a href=\""))
1001 ctx.Write([]byte(html.EscapeString(uri.String())))
1002 ctx.Write([]byte("\" rel=\"noreferrer\">"))
1003 ctx.Write([]byte(html.EscapeString(uri.String())))
1004 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1005 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1006}
1007
[35]1008func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]1009 ctx.SetContentType("text/html; charset=UTF-8")
[35]1010 ctx.SetStatusCode(statusCode)
[69]1011 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]1012 if err != nil {
[97]1013 if DEBUG {
1014 log.Println("error:", err)
1015 }
[11]1016 ctx.Write([]byte("<h2>Error: "))
1017 ctx.Write([]byte(html.EscapeString(err.Error())))
1018 ctx.Write([]byte("</h2>"))
1019 }
[1]1020 if p.Key == nil {
1021 ctx.Write([]byte(`
[36]1022 <form action="post">
1023 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
1024 <input type="submit" value="go" />
1025 </form>`))
[11]1026 } else {
1027 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]1028 }
[69]1029 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]1030}
1031
1032func main() {
[78]1033 default_listen_addr := os.Getenv("MORTY_ADDRESS")
1034 if default_listen_addr == "" {
1035 default_listen_addr = "127.0.0.1:3000"
1036 }
1037 default_key := os.Getenv("MORTY_KEY")
1038 listen := flag.String("listen", default_listen_addr, "Listen address")
[92]1039 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
[24]1040 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[74]1041 version := flag.Bool("version", false, "Show version")
[4]1042 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[109]1043 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
[1]1044 flag.Parse()
1045
[74]1046 if *version {
1047 fmt.Println(VERSION)
1048 return
1049 }
1050
[24]1051 if *ipv6 {
[109]1052 CLIENT.DialDualStack = true
[24]1053 }
1054
[109]1055 if *socks5 != "" {
1056 // this disables CLIENT.DialDualStack
1057 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1058 }
1059
[4]1060 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]1061
1062 if *key != "" {
[92]1063 var err error
1064 p.Key, err = base64.StdEncoding.DecodeString(*key)
[94]1065 if err != nil {
1066 log.Fatal("Error parsing -key", err.Error())
1067 os.Exit(1)
[92]1068 }
[1]1069 }
1070
1071 log.Println("listening on", *listen)
1072
1073 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1074 log.Fatal("Error in ListenAndServe:", err)
1075 }
1076}
Note: See TracBrowser for help on using the repository browser.