source: code/trunk/morty.go@ 141

Last change on this file since 141 was 141, checked in by Izuru Yakumo, 2 years ago

Shoot I ate a zero

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 32.6 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
[121]12 "html/template"
[1]13 "io"
14 "log"
[68]15 "mime"
[1]16 "net/url"
[78]17 "os"
[68]18 "path/filepath"
[1]19 "regexp"
20 "strings"
[4]21 "time"
[60]22 "unicode/utf8"
[1]23
24 "github.com/valyala/fasthttp"
[109]25 "github.com/valyala/fasthttp/fasthttpproxy"
[1]26 "golang.org/x/net/html"
[45]27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
[68]29
[127]30 "github.com/asciimoo/morty/config"
[68]31 "github.com/asciimoo/morty/contenttype"
[1]32)
33
34const (
35 STATE_DEFAULT int = 0
36 STATE_IN_STYLE int = 1
37 STATE_IN_NOSCRIPT int = 2
38)
39
[134]40const VERSION = "v0.2.1"
[74]41
[131]42const MAX_REDIRECT_COUNT = 5
43
[1]44var CLIENT *fasthttp.Client = &fasthttp.Client{
45 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
[113]46 ReadBufferSize: 16 * 1024, // 16K
[1]47}
48
[127]49var cfg *config.Config = config.DefaultConfig
[1]50
[68]51var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
52 // html
53 contenttype.NewFilterEquals("text", "html", ""),
54 contenttype.NewFilterEquals("application", "xhtml", "xml"),
55 // css
56 contenttype.NewFilterEquals("text", "css", ""),
57 // images
58 contenttype.NewFilterEquals("image", "gif", ""),
59 contenttype.NewFilterEquals("image", "png", ""),
60 contenttype.NewFilterEquals("image", "jpeg", ""),
61 contenttype.NewFilterEquals("image", "pjpeg", ""),
62 contenttype.NewFilterEquals("image", "webp", ""),
63 contenttype.NewFilterEquals("image", "tiff", ""),
64 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
65 contenttype.NewFilterEquals("image", "bmp", ""),
66 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
[88]67 contenttype.NewFilterEquals("image", "x-icon", ""),
[140]68 contenttype.NewFilterEquals("image", "svg", "xml"),
[68]69 // fonts
70 contenttype.NewFilterEquals("application", "font-otf", ""),
71 contenttype.NewFilterEquals("application", "font-ttf", ""),
72 contenttype.NewFilterEquals("application", "font-woff", ""),
73 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
74})
75
76var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
77 // texts
78 contenttype.NewFilterEquals("text", "csv", ""),
[103]79 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
[68]80 contenttype.NewFilterEquals("text", "plain", ""),
81 // API
82 contenttype.NewFilterEquals("application", "json", ""),
83 // Documents
84 contenttype.NewFilterEquals("application", "x-latex", ""),
85 contenttype.NewFilterEquals("application", "pdf", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
88 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
89 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
90 // Compressed archives
91 contenttype.NewFilterEquals("application", "zip", ""),
92 contenttype.NewFilterEquals("application", "gzip", ""),
93 contenttype.NewFilterEquals("application", "x-compressed", ""),
94 contenttype.NewFilterEquals("application", "x-gtar", ""),
95 contenttype.NewFilterEquals("application", "x-compress", ""),
96 // Generic binary
97 contenttype.NewFilterEquals("application", "octet-stream", ""),
98})
99
100var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
101 "charset": true,
102}
103
[1]104var UNSAFE_ELEMENTS [][]byte = [][]byte{
105 []byte("applet"),
106 []byte("canvas"),
107 []byte("embed"),
108 //[]byte("iframe"),
[46]109 []byte("math"),
[1]110 []byte("script"),
[46]111 []byte("svg"),
[1]112}
113
114var SAFE_ATTRIBUTES [][]byte = [][]byte{
115 []byte("abbr"),
116 []byte("accesskey"),
117 []byte("align"),
118 []byte("alt"),
[13]119 []byte("as"),
[1]120 []byte("autocomplete"),
121 []byte("charset"),
122 []byte("checked"),
123 []byte("class"),
124 []byte("content"),
125 []byte("contenteditable"),
126 []byte("contextmenu"),
127 []byte("dir"),
128 []byte("for"),
129 []byte("height"),
130 []byte("hidden"),
[46]131 []byte("hreflang"),
[1]132 []byte("id"),
133 []byte("lang"),
134 []byte("media"),
135 []byte("method"),
136 []byte("name"),
137 []byte("nowrap"),
138 []byte("placeholder"),
139 []byte("property"),
140 []byte("rel"),
141 []byte("spellcheck"),
142 []byte("tabindex"),
143 []byte("target"),
144 []byte("title"),
145 []byte("translate"),
146 []byte("type"),
147 []byte("value"),
148 []byte("width"),
149}
150
[46]151var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
152 []byte("alternate"),
153 []byte("archives"),
154 []byte("author"),
155 []byte("copyright"),
156 []byte("first"),
157 []byte("help"),
158 []byte("icon"),
159 []byte("index"),
160 []byte("last"),
161 []byte("license"),
162 []byte("manifest"),
163 []byte("next"),
164 []byte("pingback"),
165 []byte("prev"),
166 []byte("publisher"),
167 []byte("search"),
168 []byte("shortcut icon"),
169 []byte("stylesheet"),
170 []byte("up"),
171}
172
173var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
174 // X-UA-Compatible will be added automaticaly, so it can be skipped
175 []byte("date"),
176 []byte("last-modified"),
[50]177 []byte("refresh"), // URL rewrite
[46]178 // []byte("location"), TODO URL rewrite
179 []byte("content-language"),
180}
181
[127]182var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
183
[1]184type Proxy struct {
[4]185 Key []byte
186 RequestTimeout time.Duration
[131]187 FollowRedirect bool
[1]188}
189
190type RequestConfig struct {
[124]191 Key []byte
192 BaseURL *url.URL
193 BodyInjected bool
[1]194}
195
[121]196type HTMLBodyExtParam struct {
197 BaseURL string
198 HasMortyKey bool
[140]199 URLParamName string
[121]200}
[1]201
[121]202type HTMLFormExtParam struct {
[122]203 BaseURL string
204 MortyHash string
[140]205 URLParamName string
206 HashParamName string
[121]207}
[140]208type HTMLMainPageFormParam struct {
209 URLParamName string
210}
[1]211
[121]212var HTML_FORM_EXTENSION *template.Template
213var HTML_BODY_EXTENSION *template.Template
[140]214var HTML_MAIN_PAGE_FORM *template.Template
[46]215var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
216<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]217<meta name="referrer" content="no-referrer">
[46]218`
[45]219
[69]220var MORTY_HTML_PAGE_START string = `<!doctype html>
221<html>
222<head>
223<title>MortyProxy</title>
224<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
225<style>
226html { height: 100%; }
227body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
228input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
229input[placeholder] { width:80%; }
230a { text-decoration: none; #2980b9; }
231h1, h2 { font-weight: 200; margin-bottom: 2rem; }
232h1 { font-size: 3em; }
233.container { flex:1; min-height: 100%; margin-bottom: 1em; }
234.footer { margin: 1em; }
235.footer p { font-size: 0.8em; }
236</style>
237</head>
238<body>
239 <div class="container">
240 <h1>MortyProxy</h1>
241`
242
243var MORTY_HTML_PAGE_END string = `
244 </div>
245 <div class="footer">
246 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
247 <a href="https://github.com/asciimoo/morty">view on github</a>
248 </p>
249 </div>
250</body>
251</html>`
252
[67]253var FAVICON_BYTES []byte
254
255func init() {
256 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
257
258 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
[121]259 var err error
260 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
[123]261 `<input type="hidden" name="mortyurl" value="{{.BaseURL}}" />{{if .MortyHash}}<input type="hidden" name="mortyhash" value="{{.MortyHash}}" />{{end}}`)
[121]262 if err != nil {
263 panic(err)
264 }
265 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
266<input type="checkbox" id="mortytoggle" autocomplete="off" />
267<div id="mortyheader">
268 <form method="get">
269 <label for="mortytoggle">hide</label>
270 <span><a href="/">Morty Proxy</a></span>
[140]271 <input type="url" value="{{.BaseURL}}" name="{{.URLParamName}}" {{if .HasMortyKey }}readonly="true"{{end}} />
[121]272 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
273 </form>
274</div>
275<style>
276body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
277#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
[126]278#mortyheader * { padding: 0; margin: 0; }
[121]279#mortyheader p { padding: 0 0 0.7em 0; display: block; }
280#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
281#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
282#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
283input[type=checkbox]#mortytoggle { display: none; }
284input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
285#mortyheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
286</style>
287`)
288 if err != nil {
289 panic(err)
290 }
[140]291 HTML_MAIN_PAGE_FORM, err = template.New("html_main_page_form").Parse(`
292 <form action="post">
293 Visit url: <input placeholder="https://url.." name="{{.URLParamName}}" autofocus />
294 <input type="submit" value="go" />
295 </form>`)
296 if err != nil {
297 panic(err)
298 }
[67]299}
300
[1]301func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]302
303 if appRequestHandler(ctx) {
304 return
305 }
306
[140]307 requestHash := popRequestParam(ctx, []byte(cfg.HashParameter))
[1]308
[140]309 requestURI := popRequestParam(ctx, []byte(cfg.UrlParameter))
[1]310
311 if requestURI == nil {
[35]312 p.serveMainPage(ctx, 200, nil)
[1]313 return
314 }
315
316 if p.Key != nil {
317 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]318 // HTTP status code 403 : Forbidden
[140]319 error_message := fmt.Sprintf(`invalid "%s" parameter. hint: Hash URL Parameter`, cfg.HashParameter)
320 p.serveMainPage(ctx, 403, errors.New(error_message))
[1]321 return
322 }
323 }
324
[118]325 requestURIQuery := ctx.QueryArgs().QueryString()
326 if len(requestURIQuery) > 0 {
[125]327 if bytes.ContainsRune(requestURI, '?') {
328 requestURI = append(requestURI, '&')
329 } else {
330 requestURI = append(requestURI, '?')
331 }
[118]332 requestURI = append(requestURI, requestURIQuery...)
333 }
334
[131]335 p.ProcessUri(ctx, string(requestURI), 0)
336}
[1]337
[131]338func (p *Proxy) ProcessUri(ctx *fasthttp.RequestCtx, requestURIStr string, redirectCount int) {
339 parsedURI, err := url.Parse(requestURIStr)
340
[11]341 if err != nil {
[35]342 // HTTP status code 500 : Internal Server Error
343 p.serveMainPage(ctx, 500, err)
[1]344 return
345 }
346
[120]347 if parsedURI.Scheme == "" {
[131]348 requestURIStr = "https://" + requestURIStr
349 parsedURI, err = url.Parse(requestURIStr)
[128]350 if err != nil {
351 p.serveMainPage(ctx, 500, err)
352 return
353 }
[120]354 }
355
[69]356 // Serve an intermediate page for protocols other than HTTP(S)
357 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
358 p.serveExitMortyPage(ctx, parsedURI)
359 return
360 }
361
[1]362 req := fasthttp.AcquireRequest()
363 defer fasthttp.ReleaseRequest(req)
[12]364 req.SetConnectionClose()
[1]365
[127]366 if cfg.Debug {
[129]367 log.Println(string(ctx.Method()), requestURIStr)
[97]368 }
[1]369
[47]370 req.SetRequestURI(requestURIStr)
[141]371 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:112.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"))
[1]372
373 resp := fasthttp.AcquireResponse()
374 defer fasthttp.ReleaseResponse(resp)
375
376 req.Header.SetMethodBytes(ctx.Method())
377 if ctx.IsPost() || ctx.IsPut() {
378 req.SetBody(ctx.PostBody())
379 }
380
[11]381 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
382
383 if err != nil {
[35]384 if err == fasthttp.ErrTimeout {
385 // HTTP status code 504 : Gateway Time-Out
386 p.serveMainPage(ctx, 504, err)
387 } else {
388 // HTTP status code 500 : Internal Server Error
389 p.serveMainPage(ctx, 500, err)
390 }
[1]391 return
392 }
393
394 if resp.StatusCode() != 200 {
395 switch resp.StatusCode() {
[7]396 case 301, 302, 303, 307, 308:
[1]397 loc := resp.Header.Peek("Location")
398 if loc != nil {
[131]399 if p.FollowRedirect && ctx.IsGet() {
400 // GET method: Morty follows the redirect
401 if redirectCount < MAX_REDIRECT_COUNT {
402 if cfg.Debug {
403 log.Println("follow redirect to", string(loc))
404 }
405 p.ProcessUri(ctx, string(loc), redirectCount+1)
406 } else {
407 p.serveMainPage(ctx, 310, errors.New("Too many redirects"))
[96]408 }
[1]409 return
[131]410 } else {
411 // Other HTTP methods: Morty does NOT follow the redirect
412 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
413 url, err := rc.ProxifyURI(loc)
414 if err == nil {
415 ctx.SetStatusCode(resp.StatusCode())
416 ctx.Response.Header.Add("Location", url)
417 if cfg.Debug {
418 log.Println("redirect to", string(loc))
419 }
420 return
421 }
[1]422 }
423 }
424 }
[47]425 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]426 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]427 return
428 }
429
[68]430 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]431
[68]432 if contentTypeBytes == nil {
[35]433 // HTTP status code 503 : Service Unavailable
434 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]435 return
436 }
437
[68]438 contentTypeString := string(contentTypeBytes)
439
440 // decode Content-Type header
441 contentType, error := contenttype.ParseContentType(contentTypeString)
442 if error != nil {
443 // HTTP status code 503 : Service Unavailable
444 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]445 return
446 }
447
[68]448 // content-disposition
449 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]450
[68]451 // check content type
452 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
453 // it is not a usual content type
454 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
455 // force attachment for allowed content type
456 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
457 } else {
458 // deny access to forbidden content type
459 // HTTP status code 403 : Forbidden
[129]460 p.serveMainPage(ctx, 403, errors.New("forbidden content type "+parsedURI.String()))
[68]461 return
462 }
463 }
464
465 // HACK : replace */xhtml by text/html
466 if contentType.SubType == "xhtml" {
467 contentType.TopLevelType = "text"
468 contentType.SubType = "html"
469 contentType.Suffix = ""
470 }
471
472 // conversion to UTF-8
[1]473 var responseBody []byte
474
[68]475 if contentType.TopLevelType == "text" {
476 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]477 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
478 responseBody, err = e.NewDecoder().Bytes(resp.Body())
479 if err != nil {
480 // HTTP status code 503 : Service Unavailable
481 p.serveMainPage(ctx, 503, err)
482 return
483 }
484 } else {
485 responseBody = resp.Body()
[1]486 }
[68]487 // update the charset or specify it
488 contentType.Parameters["charset"] = "UTF-8"
[1]489 } else {
490 responseBody = resp.Body()
491 }
492
[68]493 //
494 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]495
[68]496 // set the content type
497 ctx.SetContentType(contentType.String())
498
499 // output according to MIME type
[1]500 switch {
[68]501 case contentType.SubType == "css" && contentType.Suffix == "":
[23]502 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]503 case contentType.SubType == "html" && contentType.Suffix == "":
[124]504 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
505 sanitizeHTML(rc, ctx, responseBody)
506 if !rc.BodyInjected {
[140]507 p := HTMLBodyExtParam{rc.BaseURL.String(), false, cfg.UrlParameter}
[124]508 if len(rc.Key) > 0 {
509 p.HasMortyKey = true
510 }
511 err := HTML_BODY_EXTENSION.Execute(ctx, p)
512 if err != nil {
[127]513 if cfg.Debug {
[124]514 fmt.Println("failed to inject body extension", err)
515 }
516 }
517 }
[1]518 default:
[68]519 if contentDispositionBytes != nil {
520 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]521 }
[1]522 ctx.Write(responseBody)
523 }
524}
525
[68]526// force content-disposition to attachment
527func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
528 var contentDispositionParams map[string]string
529
530 if contentDispositionBytes != nil {
531 var err error
532 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
533 if err != nil {
534 contentDispositionParams = make(map[string]string)
535 }
536 } else {
537 contentDispositionParams = make(map[string]string)
538 }
539
540 _, fileNameDefined := contentDispositionParams["filename"]
541 if !fileNameDefined {
542 // TODO : sanitize filename
543 contentDispositionParams["fileName"] = filepath.Base(url.Path)
544 }
545
546 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
547}
548
[10]549func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]550 // serve robots.txt
[10]551 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
552 ctx.SetContentType("text/plain")
553 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
554 return true
555 }
[11]556
[67]557 // server favicon.ico
558 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
559 ctx.SetContentType("image/png")
560 ctx.Write(FAVICON_BYTES)
561 return true
562 }
563
[10]564 return false
565}
566
[1]567func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
568 param := ctx.QueryArgs().PeekBytes(paramName)
569
570 if param == nil {
571 param = ctx.PostArgs().PeekBytes(paramName)
[121]572 ctx.PostArgs().DelBytes(paramName)
[1]573 }
[121]574 ctx.QueryArgs().DelBytes(paramName)
[1]575
576 return param
577}
578
[9]579func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]580 // TODO
581
582 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
583
584 if urlSlices == nil {
[9]585 out.Write(css)
[1]586 return
587 }
588
589 startIndex := 0
590
591 for _, s := range urlSlices {
[15]592 urlStart := s[4]
593 urlEnd := s[5]
[1]594
[60]595 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]596 out.Write(css[startIndex:urlStart])
597 out.Write([]byte(uri))
[1]598 startIndex = urlEnd
[127]599 } else if cfg.Debug {
[36]600 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]601 }
602 }
603 if startIndex < len(css) {
[9]604 out.Write(css[startIndex:len(css)])
[1]605 }
606}
607
[9]608func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]609 r := bytes.NewReader(htmlDoc)
610 decoder := html.NewTokenizer(r)
611 decoder.AllowCDATA(true)
612
613 unsafeElements := make([][]byte, 0, 8)
614 state := STATE_DEFAULT
615 for {
616 token := decoder.Next()
617 if token == html.ErrorToken {
618 err := decoder.Err()
619 if err != io.EOF {
[97]620 log.Println("failed to parse HTML")
[1]621 }
622 break
623 }
624
625 if len(unsafeElements) == 0 {
626
627 switch token {
628 case html.StartTagToken, html.SelfClosingTagToken:
629 tag, hasAttrs := decoder.TagName()
630 safe := !inArray(tag, UNSAFE_ELEMENTS)
631 if !safe {
[116]632 if token != html.SelfClosingTagToken {
[1]633 var unsafeTag []byte = make([]byte, len(tag))
634 copy(unsafeTag, tag)
635 unsafeElements = append(unsafeElements, unsafeTag)
636 }
637 break
638 }
[38]639 if bytes.Equal(tag, []byte("base")) {
640 for {
641 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]642 if bytes.Equal(attrName, []byte("href")) {
643 parsedURI, err := url.Parse(string(attrValue))
644 if err == nil {
645 rc.BaseURL = parsedURI
646 }
[38]647 }
648 if !moreAttr {
649 break
650 }
651 }
652 break
653 }
[1]654 if bytes.Equal(tag, []byte("noscript")) {
655 state = STATE_IN_NOSCRIPT
656 break
657 }
658 var attrs [][][]byte
659 if hasAttrs {
660 for {
661 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]662 attrs = append(attrs, [][]byte{
663 attrName,
664 attrValue,
665 []byte(html.EscapeString(string(attrValue))),
666 })
[1]667 if !moreAttr {
668 break
669 }
670 }
[13]671 }
672 if bytes.Equal(tag, []byte("link")) {
673 sanitizeLinkTag(rc, out, attrs)
674 break
675 }
676
[45]677 if bytes.Equal(tag, []byte("meta")) {
678 sanitizeMetaTag(rc, out, attrs)
679 break
680 }
681
[13]682 fmt.Fprintf(out, "<%s", tag)
683
684 if hasAttrs {
[45]685 sanitizeAttrs(rc, out, attrs)
[1]686 }
[13]687
[1]688 if token == html.SelfClosingTagToken {
[9]689 fmt.Fprintf(out, " />")
[1]690 } else {
[9]691 fmt.Fprintf(out, ">")
[1]692 if bytes.Equal(tag, []byte("style")) {
693 state = STATE_IN_STYLE
694 }
695 }
[13]696
[45]697 if bytes.Equal(tag, []byte("head")) {
[46]698 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]699 }
700
[1]701 if bytes.Equal(tag, []byte("form")) {
702 var formURL *url.URL
703 for _, attr := range attrs {
704 if bytes.Equal(attr[0], []byte("action")) {
705 formURL, _ = url.Parse(string(attr[1]))
[28]706 formURL = mergeURIs(rc.BaseURL, formURL)
[1]707 break
708 }
709 }
710 if formURL == nil {
[23]711 formURL = rc.BaseURL
[1]712 }
[2]713 urlStr := formURL.String()
714 var key string
715 if rc.Key != nil {
716 key = hash(urlStr, rc.Key)
717 }
[140]718 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key, cfg.UrlParameter, cfg.HashParameter})
[121]719 if err != nil {
[127]720 if cfg.Debug {
[121]721 fmt.Println("failed to inject body extension", err)
722 }
723 }
[1]724 }
725
726 case html.EndTagToken:
727 tag, _ := decoder.TagName()
728 writeEndTag := true
729 switch string(tag) {
730 case "body":
[140]731 p := HTMLBodyExtParam{rc.BaseURL.String(), false, cfg.UrlParameter}
[121]732 if len(rc.Key) > 0 {
733 p.HasMortyKey = true
734 }
735 err := HTML_BODY_EXTENSION.Execute(out, p)
736 if err != nil {
[127]737 if cfg.Debug {
[121]738 fmt.Println("failed to inject body extension", err)
739 }
740 }
[124]741 rc.BodyInjected = true
[1]742 case "style":
743 state = STATE_DEFAULT
744 case "noscript":
745 state = STATE_DEFAULT
746 writeEndTag = false
747 }
748 // skip noscript tags - only the tag, not the content, because javascript is sanitized
749 if writeEndTag {
[9]750 fmt.Fprintf(out, "</%s>", tag)
[1]751 }
752
753 case html.TextToken:
754 switch state {
755 case STATE_DEFAULT:
[9]756 fmt.Fprintf(out, "%s", decoder.Raw())
[1]757 case STATE_IN_STYLE:
[9]758 sanitizeCSS(rc, out, decoder.Raw())
[1]759 case STATE_IN_NOSCRIPT:
[9]760 sanitizeHTML(rc, out, decoder.Raw())
[1]761 }
762
[62]763 case html.CommentToken:
764 // ignore comment. TODO : parse IE conditional comment
765
766 case html.DoctypeToken:
[9]767 out.Write(decoder.Raw())
[1]768 }
769 } else {
770 switch token {
[116]771 case html.StartTagToken, html.SelfClosingTagToken:
[1]772 tag, _ := decoder.TagName()
773 if inArray(tag, UNSAFE_ELEMENTS) {
774 unsafeElements = append(unsafeElements, tag)
775 }
776
777 case html.EndTagToken:
778 tag, _ := decoder.TagName()
779 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
780 unsafeElements = unsafeElements[:len(unsafeElements)-1]
781 }
782 }
783 }
784 }
785}
786
[13]787func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
788 exclude := false
789 for _, attr := range attrs {
790 attrName := attr[0]
791 attrValue := attr[1]
792 if bytes.Equal(attrName, []byte("rel")) {
[46]793 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]794 exclude = true
795 break
796 }
797 }
798 if bytes.Equal(attrName, []byte("as")) {
799 if bytes.Equal(attrValue, []byte("script")) {
800 exclude = true
801 break
802 }
803 }
804 }
805
806 if !exclude {
807 out.Write([]byte("<link"))
808 for _, attr := range attrs {
[21]809 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]810 }
811 out.Write([]byte(">"))
812 }
813}
814
[45]815func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]816 var http_equiv []byte
817 var content []byte
818
819 for _, attr := range attrs {
820 attrName := attr[0]
821 attrValue := attr[1]
822 if bytes.Equal(attrName, []byte("http-equiv")) {
823 http_equiv = bytes.ToLower(attrValue)
[46]824 // exclude some <meta http-equiv="..." ..>
825 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
826 return
827 }
[1]828 }
829 if bytes.Equal(attrName, []byte("content")) {
830 content = attrValue
831 }
[45]832 if bytes.Equal(attrName, []byte("charset")) {
833 // exclude <meta charset="...">
834 return
835 }
[1]836 }
837
[45]838 out.Write([]byte("<meta"))
[14]839 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
840 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
841 contentUrl := content[urlIndex+4:]
[36]842 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]843 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]844 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]845 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]846 }
847 }
848 // output proxify result
[60]849 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]850 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]851 }
852 } else {
[46]853 if len(http_equiv) > 0 {
854 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
855 }
[9]856 sanitizeAttrs(rc, out, attrs)
[1]857 }
[45]858 out.Write([]byte(">"))
[1]859}
860
[9]861func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]862 for _, attr := range attrs {
[21]863 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]864 }
865}
866
[21]867func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]868 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]869 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]870 return
871 }
872 switch string(attrName) {
873 case "src", "href", "action":
[60]874 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]875 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[127]876 } else if cfg.Debug {
[36]877 log.Println("cannot proxify uri:", string(attrValue))
[1]878 }
879 case "style":
[21]880 cssAttr := bytes.NewBuffer(nil)
881 sanitizeCSS(rc, cssAttr, attrValue)
882 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]883 }
884}
885
[36]886func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]887 if u2 == nil {
888 return u1
889 }
[28]890 return u1.ResolveReference(u2)
[1]891}
892
[60]893// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
894// avoid memory allocation (except for the scheme)
895func sanitizeURI(uri []byte) ([]byte, string) {
896 first_rune_index := 0
897 first_rune_seen := false
898 scheme_last_index := -1
899 buffer := bytes.NewBuffer(make([]byte, 0, 10))
900
901 // remove trailing space and special characters
902 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
903
904 // loop over byte by byte
905 for i, c := range uri {
906 // ignore special characters and space (c <= 32)
907 if c > 32 {
908 // append to the lower case of the rune to buffer
909 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
910 c = c + 'a' - 'A'
911 }
912
913 buffer.WriteByte(c)
914
915 // update the first rune index that is not a special rune
916 if !first_rune_seen {
917 first_rune_index = i
918 first_rune_seen = true
919 }
920
921 if c == ':' {
922 // colon rune found, we have found the scheme
923 scheme_last_index = i
924 break
925 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
926 // special case : most probably a relative URI
927 break
928 }
929 }
930 }
931
932 if scheme_last_index != -1 {
933 // scheme found
934 // copy the "lower case without special runes scheme" before the ":" rune
935 scheme_start_index := scheme_last_index - buffer.Len() + 1
936 copy(uri[scheme_start_index:], buffer.Bytes())
937 // and return the result
938 return uri[scheme_start_index:], buffer.String()
939 } else {
940 // scheme NOT found
941 return uri[first_rune_index:], ""
942 }
943}
944
945func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
946 // sanitize URI
947 uri, scheme := sanitizeURI(uri)
948
[28]949 // remove javascript protocol
[60]950 if scheme == "javascript:" {
[28]951 return "", nil
952 }
[57]953
[1]954 // TODO check malicious data: - e.g. data:script
[60]955 if scheme == "data:" {
[61]956 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
957 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
958 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
959 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
960 bytes.HasPrefix(uri, []byte("data:image/webp")) {
961 // should be safe
962 return string(uri), nil
963 } else {
964 // unsafe data
965 return "", nil
966 }
[1]967 }
968
[57]969 // parse the uri
[60]970 u, err := url.Parse(string(uri))
[1]971 if err != nil {
972 return "", err
973 }
[57]974
975 // get the fragment (with the prefix "#")
976 fragment := ""
977 if len(u.Fragment) > 0 {
978 fragment = "#" + u.Fragment
979 }
980
981 // reset the fragment: it is not included in the mortyurl
982 u.Fragment = ""
983
984 // merge the URI with the document URI
[28]985 u = mergeURIs(rc.BaseURL, u)
[1]986
[57]987 // simple internal link ?
988 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
989 if u.Scheme == rc.BaseURL.Scheme &&
990 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
991 u.Host == rc.BaseURL.Host &&
992 u.Path == rc.BaseURL.Path &&
993 u.RawQuery == rc.BaseURL.RawQuery {
994 // the fragment is the only difference between the document URI and the uri parameter
995 return fragment, nil
996 }
997
998 // return full URI and fragment (if not empty)
[60]999 morty_uri := u.String()
[1]1000
1001 if rc.Key == nil {
[140]1002 return fmt.Sprintf("./?%s=%s%s", cfg.UrlParameter, url.QueryEscape(morty_uri), fragment), nil
[1]1003 }
[140]1004 return fmt.Sprintf("./?%s=%s&%s=%s%s", cfg.HashParameter, hash(morty_uri, rc.Key), cfg.UrlParameter, url.QueryEscape(morty_uri), fragment), nil
[1]1005}
1006
1007func inArray(b []byte, a [][]byte) bool {
1008 for _, b2 := range a {
1009 if bytes.Equal(b, b2) {
1010 return true
1011 }
1012 }
1013 return false
1014}
1015
1016func hash(msg string, key []byte) string {
1017 mac := hmac.New(sha256.New, key)
1018 mac.Write([]byte(msg))
1019 return hex.EncodeToString(mac.Sum(nil))
1020}
1021
1022func verifyRequestURI(uri, hashMsg, key []byte) bool {
1023 h := make([]byte, hex.DecodedLen(len(hashMsg)))
1024 _, err := hex.Decode(h, hashMsg)
1025 if err != nil {
[127]1026 if cfg.Debug {
[97]1027 log.Println("hmac error:", err)
1028 }
[1]1029 return false
1030 }
1031 mac := hmac.New(sha256.New, key)
1032 mac.Write(uri)
1033 return hmac.Equal(h, mac.Sum(nil))
1034}
1035
[69]1036func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
1037 ctx.SetContentType("text/html")
1038 ctx.SetStatusCode(403)
1039 ctx.Write([]byte(MORTY_HTML_PAGE_START))
1040 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
1041 ctx.Write([]byte("<p>Following</p><p><a href=\""))
1042 ctx.Write([]byte(html.EscapeString(uri.String())))
1043 ctx.Write([]byte("\" rel=\"noreferrer\">"))
1044 ctx.Write([]byte(html.EscapeString(uri.String())))
1045 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1046 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1047}
1048
[35]1049func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]1050 ctx.SetContentType("text/html; charset=UTF-8")
[35]1051 ctx.SetStatusCode(statusCode)
[69]1052 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]1053 if err != nil {
[127]1054 if cfg.Debug {
[97]1055 log.Println("error:", err)
1056 }
[11]1057 ctx.Write([]byte("<h2>Error: "))
1058 ctx.Write([]byte(html.EscapeString(err.Error())))
1059 ctx.Write([]byte("</h2>"))
1060 }
[1]1061 if p.Key == nil {
[140]1062 p := HTMLMainPageFormParam{cfg.UrlParameter}
1063 err := HTML_MAIN_PAGE_FORM.Execute(ctx, p)
1064 if err != nil {
1065 if cfg.Debug {
1066 fmt.Println("failed to inject main page form", err)
1067 }
1068 }
[11]1069 } else {
1070 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]1071 }
[69]1072 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]1073}
1074
1075func main() {
[137]1076 listenAddress := flag.String("listen", cfg.ListenAddress, "Listen address")
1077 key := flag.String("key", cfg.Key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
1078 IPV6 := flag.Bool("ipv6", cfg.IPV6, "Allow IPv6 HTTP requests")
1079 debug := flag.Bool("debug", cfg.Debug, "Debug mode")
1080 requestTimeout := flag.Uint("timeout", cfg.RequestTimeout, "Request timeout")
1081 followRedirect := flag.Bool("followredirect", cfg.FollowRedirect, "Follow HTTP GET redirect")
[134]1082 proxyenv := flag.Bool("proxyenv", false, "Use a HTTP proxy as set in the environment (HTTP_PROXY, HTTPS_PROXY and NO_PROXY). Overrides -proxy, -socks5, -ipv6.")
1083 proxy := flag.String("proxy", "", "Use the specified HTTP proxy (ie: '[user:pass@]hostname:port'). Overrides -socks5, -ipv6.")
1084 socks5 := flag.String("socks5", "", "Use a SOCKS5 proxy (ie: 'hostname:port'). Overrides -ipv6.")
[140]1085 urlParameter := flag.String("urlparam", cfg.UrlParameter, "user-defined requesting string URL parameter name (ie: '/?url=...' or '/?u=...')")
1086 hashParameter := flag.String("hashparam", cfg.HashParameter, "user-defined requesting string HASH parameter name (ie: '/?hash=...' or '/?h=...')")
[74]1087 version := flag.Bool("version", false, "Show version")
[1]1088 flag.Parse()
1089
[137]1090 cfg.ListenAddress = *listenAddress
1091 cfg.Key = *key
1092 cfg.IPV6 = *IPV6
1093 cfg.Debug = *debug
1094 cfg.RequestTimeout = *requestTimeout
1095 cfg.FollowRedirect = *followRedirect
[140]1096 cfg.UrlParameter = *urlParameter
1097 cfg.HashParameter = *hashParameter
[137]1098
[74]1099 if *version {
1100 fmt.Println(VERSION)
1101 return
1102 }
1103
[134]1104 if *proxyenv && os.Getenv("HTTP_PROXY") == "" && os.Getenv("HTTPS_PROXY") == "" {
1105 log.Fatal("Error -proxyenv is used but no environment variables named 'HTTP_PROXY' and/or 'HTTPS_PROXY' could be found.")
1106 os.Exit(1)
1107 }
1108
[135]1109 if *proxyenv {
[134]1110 CLIENT.Dial = fasthttpproxy.FasthttpProxyHTTPDialer()
1111 log.Println("Using environment defined proxy(ies).")
1112 } else if *proxy != "" {
1113 CLIENT.Dial = fasthttpproxy.FasthttpHTTPDialer(*proxy)
1114 log.Println("Using custom HTTP proxy.")
1115 } else if *socks5 != "" {
[109]1116 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
[134]1117 log.Println("Using Socks5 proxy.")
1118 } else if cfg.IPV6 {
[127]1119 CLIENT.Dial = fasthttp.DialDualStack
[134]1120 log.Println("Using dual stack (IPv4/IPv6) direct connections.")
1121 } else {
1122 CLIENT.Dial = fasthttp.Dial
1123 log.Println("Using IPv4 only direct connections.")
[127]1124 }
[109]1125
[131]1126 p := &Proxy{RequestTimeout: time.Duration(cfg.RequestTimeout) * time.Second,
1127 FollowRedirect: cfg.FollowRedirect}
[1]1128
[127]1129 if cfg.Key != "" {
[92]1130 var err error
[127]1131 p.Key, err = base64.StdEncoding.DecodeString(cfg.Key)
[94]1132 if err != nil {
1133 log.Fatal("Error parsing -key", err.Error())
1134 os.Exit(1)
[92]1135 }
[1]1136 }
1137
[127]1138 log.Println("listening on", cfg.ListenAddress)
[1]1139
[127]1140 if err := fasthttp.ListenAndServe(cfg.ListenAddress, p.RequestHandler); err != nil {
[1]1141 log.Fatal("Error in ListenAndServe:", err)
1142 }
1143}
Note: See TracBrowser for help on using the repository browser.