[1] | 1 | package main
|
---|
| 2 |
|
---|
| 3 | import (
|
---|
| 4 | "bytes"
|
---|
| 5 | "crypto/hmac"
|
---|
| 6 | "crypto/sha256"
|
---|
| 7 | "encoding/hex"
|
---|
| 8 | "errors"
|
---|
| 9 | "flag"
|
---|
| 10 | "fmt"
|
---|
| 11 | "io"
|
---|
| 12 | "log"
|
---|
| 13 | "net/url"
|
---|
| 14 | "regexp"
|
---|
| 15 | "strings"
|
---|
[4] | 16 | "time"
|
---|
[60] | 17 | "unicode/utf8"
|
---|
[1] | 18 |
|
---|
| 19 | "github.com/valyala/fasthttp"
|
---|
| 20 | "golang.org/x/net/html"
|
---|
[45] | 21 | "golang.org/x/net/html/charset"
|
---|
| 22 | "golang.org/x/text/encoding"
|
---|
[1] | 23 | )
|
---|
| 24 |
|
---|
| 25 | const (
|
---|
| 26 | STATE_DEFAULT int = 0
|
---|
| 27 | STATE_IN_STYLE int = 1
|
---|
| 28 | STATE_IN_NOSCRIPT int = 2
|
---|
| 29 | )
|
---|
| 30 |
|
---|
| 31 | var CLIENT *fasthttp.Client = &fasthttp.Client{
|
---|
| 32 | MaxResponseBodySize: 10 * 1024 * 1024, // 10M
|
---|
| 33 | }
|
---|
| 34 |
|
---|
[27] | 35 | var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
|
---|
[1] | 36 |
|
---|
| 37 | var UNSAFE_ELEMENTS [][]byte = [][]byte{
|
---|
| 38 | []byte("applet"),
|
---|
| 39 | []byte("canvas"),
|
---|
| 40 | []byte("embed"),
|
---|
| 41 | //[]byte("iframe"),
|
---|
[46] | 42 | []byte("math"),
|
---|
[1] | 43 | []byte("script"),
|
---|
[46] | 44 | []byte("svg"),
|
---|
[1] | 45 | }
|
---|
| 46 |
|
---|
| 47 | var SAFE_ATTRIBUTES [][]byte = [][]byte{
|
---|
| 48 | []byte("abbr"),
|
---|
| 49 | []byte("accesskey"),
|
---|
| 50 | []byte("align"),
|
---|
| 51 | []byte("alt"),
|
---|
[13] | 52 | []byte("as"),
|
---|
[1] | 53 | []byte("autocomplete"),
|
---|
| 54 | []byte("charset"),
|
---|
| 55 | []byte("checked"),
|
---|
| 56 | []byte("class"),
|
---|
| 57 | []byte("content"),
|
---|
| 58 | []byte("contenteditable"),
|
---|
| 59 | []byte("contextmenu"),
|
---|
| 60 | []byte("dir"),
|
---|
| 61 | []byte("for"),
|
---|
| 62 | []byte("height"),
|
---|
| 63 | []byte("hidden"),
|
---|
[46] | 64 | []byte("hreflang"),
|
---|
[1] | 65 | []byte("id"),
|
---|
| 66 | []byte("lang"),
|
---|
| 67 | []byte("media"),
|
---|
| 68 | []byte("method"),
|
---|
| 69 | []byte("name"),
|
---|
| 70 | []byte("nowrap"),
|
---|
| 71 | []byte("placeholder"),
|
---|
| 72 | []byte("property"),
|
---|
| 73 | []byte("rel"),
|
---|
| 74 | []byte("spellcheck"),
|
---|
| 75 | []byte("tabindex"),
|
---|
| 76 | []byte("target"),
|
---|
| 77 | []byte("title"),
|
---|
| 78 | []byte("translate"),
|
---|
| 79 | []byte("type"),
|
---|
| 80 | []byte("value"),
|
---|
| 81 | []byte("width"),
|
---|
| 82 | }
|
---|
| 83 |
|
---|
| 84 | var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
|
---|
| 85 | []byte("area"),
|
---|
| 86 | []byte("base"),
|
---|
| 87 | []byte("br"),
|
---|
| 88 | []byte("col"),
|
---|
| 89 | []byte("embed"),
|
---|
| 90 | []byte("hr"),
|
---|
| 91 | []byte("img"),
|
---|
| 92 | []byte("input"),
|
---|
| 93 | []byte("keygen"),
|
---|
| 94 | []byte("link"),
|
---|
| 95 | []byte("meta"),
|
---|
| 96 | []byte("param"),
|
---|
| 97 | []byte("source"),
|
---|
| 98 | []byte("track"),
|
---|
| 99 | []byte("wbr"),
|
---|
| 100 | }
|
---|
| 101 |
|
---|
[46] | 102 | var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
|
---|
| 103 | []byte("alternate"),
|
---|
| 104 | []byte("archives"),
|
---|
| 105 | []byte("author"),
|
---|
| 106 | []byte("copyright"),
|
---|
| 107 | []byte("first"),
|
---|
| 108 | []byte("help"),
|
---|
| 109 | []byte("icon"),
|
---|
| 110 | []byte("index"),
|
---|
| 111 | []byte("last"),
|
---|
| 112 | []byte("license"),
|
---|
| 113 | []byte("manifest"),
|
---|
| 114 | []byte("next"),
|
---|
| 115 | []byte("pingback"),
|
---|
| 116 | []byte("prev"),
|
---|
| 117 | []byte("publisher"),
|
---|
| 118 | []byte("search"),
|
---|
| 119 | []byte("shortcut icon"),
|
---|
| 120 | []byte("stylesheet"),
|
---|
| 121 | []byte("up"),
|
---|
| 122 | }
|
---|
| 123 |
|
---|
| 124 | var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
|
---|
| 125 | // X-UA-Compatible will be added automaticaly, so it can be skipped
|
---|
| 126 | []byte("date"),
|
---|
| 127 | []byte("last-modified"),
|
---|
[50] | 128 | []byte("refresh"), // URL rewrite
|
---|
[46] | 129 | // []byte("location"), TODO URL rewrite
|
---|
| 130 | []byte("content-language"),
|
---|
| 131 | }
|
---|
| 132 |
|
---|
[1] | 133 | type Proxy struct {
|
---|
[4] | 134 | Key []byte
|
---|
| 135 | RequestTimeout time.Duration
|
---|
[1] | 136 | }
|
---|
| 137 |
|
---|
| 138 | type RequestConfig struct {
|
---|
| 139 | Key []byte
|
---|
[23] | 140 | BaseURL *url.URL
|
---|
[1] | 141 | }
|
---|
| 142 |
|
---|
[2] | 143 | var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
|
---|
[1] | 144 |
|
---|
| 145 | var HTML_BODY_EXTENSION string = `
|
---|
| 146 | <div id="mortyheader">
|
---|
| 147 | <input type="checkbox" id="mortytoggle" autocomplete="off" />
|
---|
[36] | 148 | <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p></div>
|
---|
[1] | 149 | </div>
|
---|
| 150 | <style>
|
---|
[36] | 151 | #mortyheader { position: fixed; padding: 12px 12px 12px 0; margin: 0; box-sizing: content-box; top: 15%%; left: 0; max-width: 140px; color: #444; overflow: hidden; z-index: 110000; font-size: 12px; line-height: normal; }
|
---|
| 152 | #mortyheader a { color: #3498db; font-weight: bold; }
|
---|
| 153 | #mortyheader p { padding: 0 0 0.7em 0; margin: 0; }
|
---|
| 154 | #mortyheader > div { padding: 8px; font-size: 12px !important; font-family: sans !important; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
|
---|
[5] | 155 | #mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
|
---|
[1] | 156 | input[type=checkbox]#mortytoggle { display: none; }
|
---|
| 157 | input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
|
---|
| 158 | </style>
|
---|
| 159 | `
|
---|
| 160 |
|
---|
[46] | 161 | var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
---|
| 162 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
|
---|
| 163 | `
|
---|
[45] | 164 |
|
---|
[1] | 165 | func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
|
---|
[10] | 166 |
|
---|
| 167 | if appRequestHandler(ctx) {
|
---|
| 168 | return
|
---|
| 169 | }
|
---|
| 170 |
|
---|
[1] | 171 | requestHash := popRequestParam(ctx, []byte("mortyhash"))
|
---|
| 172 |
|
---|
| 173 | requestURI := popRequestParam(ctx, []byte("mortyurl"))
|
---|
| 174 |
|
---|
| 175 | if requestURI == nil {
|
---|
[35] | 176 | p.serveMainPage(ctx, 200, nil)
|
---|
[1] | 177 | return
|
---|
| 178 | }
|
---|
| 179 |
|
---|
| 180 | if p.Key != nil {
|
---|
| 181 | if !verifyRequestURI(requestURI, requestHash, p.Key) {
|
---|
[35] | 182 | // HTTP status code 403 : Forbidden
|
---|
| 183 | p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
|
---|
[1] | 184 | return
|
---|
| 185 | }
|
---|
| 186 | }
|
---|
| 187 |
|
---|
| 188 | parsedURI, err := url.Parse(string(requestURI))
|
---|
| 189 |
|
---|
[18] | 190 | if strings.HasSuffix(parsedURI.Host, ".onion") {
|
---|
[35] | 191 | // HTTP status code 501 : Not Implemented
|
---|
| 192 | p.serveMainPage(ctx, 501, errors.New("Tor urls are not supported yet"))
|
---|
[18] | 193 | return
|
---|
| 194 | }
|
---|
| 195 |
|
---|
[11] | 196 | if err != nil {
|
---|
[35] | 197 | // HTTP status code 500 : Internal Server Error
|
---|
| 198 | p.serveMainPage(ctx, 500, err)
|
---|
[1] | 199 | return
|
---|
| 200 | }
|
---|
| 201 |
|
---|
| 202 | req := fasthttp.AcquireRequest()
|
---|
| 203 | defer fasthttp.ReleaseRequest(req)
|
---|
[12] | 204 | req.SetConnectionClose()
|
---|
[1] | 205 |
|
---|
[47] | 206 | requestURIStr := string(requestURI)
|
---|
[1] | 207 |
|
---|
[47] | 208 | log.Println("getting", requestURIStr)
|
---|
[1] | 209 |
|
---|
[47] | 210 | req.SetRequestURI(requestURIStr)
|
---|
[60] | 211 | req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
|
---|
[1] | 212 |
|
---|
| 213 | resp := fasthttp.AcquireResponse()
|
---|
| 214 | defer fasthttp.ReleaseResponse(resp)
|
---|
| 215 |
|
---|
| 216 | req.Header.SetMethodBytes(ctx.Method())
|
---|
| 217 | if ctx.IsPost() || ctx.IsPut() {
|
---|
| 218 | req.SetBody(ctx.PostBody())
|
---|
| 219 | }
|
---|
| 220 |
|
---|
[11] | 221 | err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
|
---|
| 222 |
|
---|
| 223 | if err != nil {
|
---|
[35] | 224 | if err == fasthttp.ErrTimeout {
|
---|
| 225 | // HTTP status code 504 : Gateway Time-Out
|
---|
| 226 | p.serveMainPage(ctx, 504, err)
|
---|
| 227 | } else {
|
---|
| 228 | // HTTP status code 500 : Internal Server Error
|
---|
| 229 | p.serveMainPage(ctx, 500, err)
|
---|
| 230 | }
|
---|
[1] | 231 | return
|
---|
| 232 | }
|
---|
| 233 |
|
---|
| 234 | if resp.StatusCode() != 200 {
|
---|
| 235 | switch resp.StatusCode() {
|
---|
[7] | 236 | case 301, 302, 303, 307, 308:
|
---|
[1] | 237 | loc := resp.Header.Peek("Location")
|
---|
| 238 | if loc != nil {
|
---|
[23] | 239 | rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
|
---|
[60] | 240 | url, err := rc.ProxifyURI(loc)
|
---|
[1] | 241 | if err == nil {
|
---|
| 242 | ctx.SetStatusCode(resp.StatusCode())
|
---|
| 243 | ctx.Response.Header.Add("Location", url)
|
---|
| 244 | log.Println("redirect to", string(loc))
|
---|
| 245 | return
|
---|
| 246 | }
|
---|
| 247 | }
|
---|
| 248 | }
|
---|
[47] | 249 | error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
|
---|
[37] | 250 | p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
|
---|
[1] | 251 | return
|
---|
| 252 | }
|
---|
| 253 |
|
---|
| 254 | contentType := resp.Header.Peek("Content-Type")
|
---|
| 255 |
|
---|
| 256 | if contentType == nil {
|
---|
[35] | 257 | // HTTP status code 503 : Service Unavailable
|
---|
| 258 | p.serveMainPage(ctx, 503, errors.New("invalid content type"))
|
---|
[1] | 259 | return
|
---|
| 260 | }
|
---|
| 261 |
|
---|
[17] | 262 | if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
|
---|
[35] | 263 | // HTTP status code 403 : Forbidden
|
---|
| 264 | p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
|
---|
[17] | 265 | return
|
---|
| 266 | }
|
---|
| 267 |
|
---|
[1] | 268 | contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
|
---|
| 269 |
|
---|
| 270 | var responseBody []byte
|
---|
| 271 |
|
---|
[45] | 272 | if len(contentInfo) == 2 && bytes.Contains(contentInfo[0], []byte("text")) {
|
---|
| 273 | e, ename, _ := charset.DetermineEncoding(resp.Body(), string(contentType))
|
---|
| 274 | if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
|
---|
| 275 | responseBody, err = e.NewDecoder().Bytes(resp.Body())
|
---|
| 276 | if err != nil {
|
---|
| 277 | // HTTP status code 503 : Service Unavailable
|
---|
| 278 | p.serveMainPage(ctx, 503, err)
|
---|
| 279 | return
|
---|
| 280 | }
|
---|
| 281 | } else {
|
---|
| 282 | responseBody = resp.Body()
|
---|
[1] | 283 | }
|
---|
| 284 | } else {
|
---|
| 285 | responseBody = resp.Body()
|
---|
| 286 | }
|
---|
| 287 |
|
---|
[52] | 288 | if bytes.Contains(contentType, []byte("xhtml")) {
|
---|
| 289 | ctx.SetContentType("text/html; charset=UTF-8")
|
---|
| 290 | } else {
|
---|
| 291 | ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
|
---|
| 292 | }
|
---|
[1] | 293 |
|
---|
| 294 | switch {
|
---|
| 295 | case bytes.Contains(contentType, []byte("css")):
|
---|
[23] | 296 | sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
|
---|
[1] | 297 | case bytes.Contains(contentType, []byte("html")):
|
---|
[23] | 298 | sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
|
---|
[1] | 299 | default:
|
---|
[39] | 300 | if ctx.Request.Header.Peek("Content-Disposition") != nil {
|
---|
| 301 | ctx.Response.Header.AddBytesV("Content-Disposition", ctx.Request.Header.Peek("Content-Disposition"))
|
---|
| 302 | }
|
---|
[1] | 303 | ctx.Write(responseBody)
|
---|
| 304 | }
|
---|
| 305 | }
|
---|
| 306 |
|
---|
[10] | 307 | func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
|
---|
[11] | 308 | // serve robots.txt
|
---|
[10] | 309 | if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
|
---|
| 310 | ctx.SetContentType("text/plain")
|
---|
| 311 | ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
|
---|
| 312 | return true
|
---|
| 313 | }
|
---|
[11] | 314 |
|
---|
[10] | 315 | return false
|
---|
| 316 | }
|
---|
| 317 |
|
---|
[1] | 318 | func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
|
---|
| 319 | param := ctx.QueryArgs().PeekBytes(paramName)
|
---|
| 320 |
|
---|
| 321 | if param == nil {
|
---|
| 322 | param = ctx.PostArgs().PeekBytes(paramName)
|
---|
| 323 | if param != nil {
|
---|
| 324 | ctx.PostArgs().DelBytes(paramName)
|
---|
| 325 | }
|
---|
| 326 | } else {
|
---|
| 327 | ctx.QueryArgs().DelBytes(paramName)
|
---|
| 328 | }
|
---|
| 329 |
|
---|
| 330 | return param
|
---|
| 331 | }
|
---|
| 332 |
|
---|
[9] | 333 | func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
|
---|
[1] | 334 | // TODO
|
---|
| 335 |
|
---|
| 336 | urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
|
---|
| 337 |
|
---|
| 338 | if urlSlices == nil {
|
---|
[9] | 339 | out.Write(css)
|
---|
[1] | 340 | return
|
---|
| 341 | }
|
---|
| 342 |
|
---|
| 343 | startIndex := 0
|
---|
| 344 |
|
---|
| 345 | for _, s := range urlSlices {
|
---|
[15] | 346 | urlStart := s[4]
|
---|
| 347 | urlEnd := s[5]
|
---|
[1] | 348 |
|
---|
[60] | 349 | if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
|
---|
[9] | 350 | out.Write(css[startIndex:urlStart])
|
---|
| 351 | out.Write([]byte(uri))
|
---|
[1] | 352 | startIndex = urlEnd
|
---|
| 353 | } else {
|
---|
[36] | 354 | log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
|
---|
[1] | 355 | }
|
---|
| 356 | }
|
---|
| 357 | if startIndex < len(css) {
|
---|
[9] | 358 | out.Write(css[startIndex:len(css)])
|
---|
[1] | 359 | }
|
---|
| 360 | }
|
---|
| 361 |
|
---|
[9] | 362 | func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
|
---|
[1] | 363 | r := bytes.NewReader(htmlDoc)
|
---|
| 364 | decoder := html.NewTokenizer(r)
|
---|
| 365 | decoder.AllowCDATA(true)
|
---|
| 366 |
|
---|
| 367 | unsafeElements := make([][]byte, 0, 8)
|
---|
| 368 | state := STATE_DEFAULT
|
---|
| 369 | for {
|
---|
| 370 | token := decoder.Next()
|
---|
| 371 | if token == html.ErrorToken {
|
---|
| 372 | err := decoder.Err()
|
---|
| 373 | if err != io.EOF {
|
---|
| 374 | log.Println("failed to parse HTML:")
|
---|
| 375 | }
|
---|
| 376 | break
|
---|
| 377 | }
|
---|
| 378 |
|
---|
| 379 | if len(unsafeElements) == 0 {
|
---|
| 380 |
|
---|
| 381 | switch token {
|
---|
| 382 | case html.StartTagToken, html.SelfClosingTagToken:
|
---|
| 383 | tag, hasAttrs := decoder.TagName()
|
---|
| 384 | safe := !inArray(tag, UNSAFE_ELEMENTS)
|
---|
| 385 | if !safe {
|
---|
| 386 | if !inArray(tag, SELF_CLOSING_ELEMENTS) {
|
---|
| 387 | var unsafeTag []byte = make([]byte, len(tag))
|
---|
| 388 | copy(unsafeTag, tag)
|
---|
| 389 | unsafeElements = append(unsafeElements, unsafeTag)
|
---|
| 390 | }
|
---|
| 391 | break
|
---|
| 392 | }
|
---|
[38] | 393 | if bytes.Equal(tag, []byte("base")) {
|
---|
| 394 | for {
|
---|
| 395 | attrName, attrValue, moreAttr := decoder.TagAttr()
|
---|
[45] | 396 | if bytes.Equal(attrName, []byte("href")) {
|
---|
| 397 | parsedURI, err := url.Parse(string(attrValue))
|
---|
| 398 | if err == nil {
|
---|
| 399 | rc.BaseURL = parsedURI
|
---|
| 400 | }
|
---|
[38] | 401 | }
|
---|
| 402 | if !moreAttr {
|
---|
| 403 | break
|
---|
| 404 | }
|
---|
| 405 | }
|
---|
| 406 | break
|
---|
| 407 | }
|
---|
[1] | 408 | if bytes.Equal(tag, []byte("noscript")) {
|
---|
| 409 | state = STATE_IN_NOSCRIPT
|
---|
| 410 | break
|
---|
| 411 | }
|
---|
| 412 | var attrs [][][]byte
|
---|
| 413 | if hasAttrs {
|
---|
| 414 | for {
|
---|
| 415 | attrName, attrValue, moreAttr := decoder.TagAttr()
|
---|
[21] | 416 | attrs = append(attrs, [][]byte{
|
---|
| 417 | attrName,
|
---|
| 418 | attrValue,
|
---|
| 419 | []byte(html.EscapeString(string(attrValue))),
|
---|
| 420 | })
|
---|
[1] | 421 | if !moreAttr {
|
---|
| 422 | break
|
---|
| 423 | }
|
---|
| 424 | }
|
---|
[13] | 425 | }
|
---|
| 426 | if bytes.Equal(tag, []byte("link")) {
|
---|
| 427 | sanitizeLinkTag(rc, out, attrs)
|
---|
| 428 | break
|
---|
| 429 | }
|
---|
| 430 |
|
---|
[45] | 431 | if bytes.Equal(tag, []byte("meta")) {
|
---|
| 432 | sanitizeMetaTag(rc, out, attrs)
|
---|
| 433 | break
|
---|
| 434 | }
|
---|
| 435 |
|
---|
[13] | 436 | fmt.Fprintf(out, "<%s", tag)
|
---|
| 437 |
|
---|
| 438 | if hasAttrs {
|
---|
[45] | 439 | sanitizeAttrs(rc, out, attrs)
|
---|
[1] | 440 | }
|
---|
[13] | 441 |
|
---|
[1] | 442 | if token == html.SelfClosingTagToken {
|
---|
[9] | 443 | fmt.Fprintf(out, " />")
|
---|
[1] | 444 | } else {
|
---|
[9] | 445 | fmt.Fprintf(out, ">")
|
---|
[1] | 446 | if bytes.Equal(tag, []byte("style")) {
|
---|
| 447 | state = STATE_IN_STYLE
|
---|
| 448 | }
|
---|
| 449 | }
|
---|
[13] | 450 |
|
---|
[45] | 451 | if bytes.Equal(tag, []byte("head")) {
|
---|
[46] | 452 | fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
|
---|
[45] | 453 | }
|
---|
| 454 |
|
---|
[1] | 455 | if bytes.Equal(tag, []byte("form")) {
|
---|
| 456 | var formURL *url.URL
|
---|
| 457 | for _, attr := range attrs {
|
---|
| 458 | if bytes.Equal(attr[0], []byte("action")) {
|
---|
| 459 | formURL, _ = url.Parse(string(attr[1]))
|
---|
[28] | 460 | formURL = mergeURIs(rc.BaseURL, formURL)
|
---|
[1] | 461 | break
|
---|
| 462 | }
|
---|
| 463 | }
|
---|
| 464 | if formURL == nil {
|
---|
[23] | 465 | formURL = rc.BaseURL
|
---|
[1] | 466 | }
|
---|
[2] | 467 | urlStr := formURL.String()
|
---|
| 468 | var key string
|
---|
| 469 | if rc.Key != nil {
|
---|
| 470 | key = hash(urlStr, rc.Key)
|
---|
| 471 | }
|
---|
[9] | 472 | fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
|
---|
[1] | 473 |
|
---|
| 474 | }
|
---|
| 475 |
|
---|
| 476 | case html.EndTagToken:
|
---|
| 477 | tag, _ := decoder.TagName()
|
---|
| 478 | writeEndTag := true
|
---|
| 479 | switch string(tag) {
|
---|
| 480 | case "body":
|
---|
[23] | 481 | fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
|
---|
[1] | 482 | case "style":
|
---|
| 483 | state = STATE_DEFAULT
|
---|
| 484 | case "noscript":
|
---|
| 485 | state = STATE_DEFAULT
|
---|
| 486 | writeEndTag = false
|
---|
| 487 | }
|
---|
| 488 | // skip noscript tags - only the tag, not the content, because javascript is sanitized
|
---|
| 489 | if writeEndTag {
|
---|
[9] | 490 | fmt.Fprintf(out, "</%s>", tag)
|
---|
[1] | 491 | }
|
---|
| 492 |
|
---|
| 493 | case html.TextToken:
|
---|
| 494 | switch state {
|
---|
| 495 | case STATE_DEFAULT:
|
---|
[9] | 496 | fmt.Fprintf(out, "%s", decoder.Raw())
|
---|
[1] | 497 | case STATE_IN_STYLE:
|
---|
[9] | 498 | sanitizeCSS(rc, out, decoder.Raw())
|
---|
[1] | 499 | case STATE_IN_NOSCRIPT:
|
---|
[9] | 500 | sanitizeHTML(rc, out, decoder.Raw())
|
---|
[1] | 501 | }
|
---|
| 502 |
|
---|
[60] | 503 | case html.DoctypeToken, html.CommentToken:
|
---|
[9] | 504 | out.Write(decoder.Raw())
|
---|
[1] | 505 | }
|
---|
| 506 | } else {
|
---|
| 507 | switch token {
|
---|
| 508 | case html.StartTagToken:
|
---|
| 509 | tag, _ := decoder.TagName()
|
---|
| 510 | if inArray(tag, UNSAFE_ELEMENTS) {
|
---|
| 511 | unsafeElements = append(unsafeElements, tag)
|
---|
| 512 | }
|
---|
| 513 |
|
---|
| 514 | case html.EndTagToken:
|
---|
| 515 | tag, _ := decoder.TagName()
|
---|
| 516 | if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
|
---|
| 517 | unsafeElements = unsafeElements[:len(unsafeElements)-1]
|
---|
| 518 | }
|
---|
| 519 | }
|
---|
| 520 | }
|
---|
| 521 | }
|
---|
| 522 | }
|
---|
| 523 |
|
---|
[13] | 524 | func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
|
---|
| 525 | exclude := false
|
---|
| 526 | for _, attr := range attrs {
|
---|
| 527 | attrName := attr[0]
|
---|
| 528 | attrValue := attr[1]
|
---|
| 529 | if bytes.Equal(attrName, []byte("rel")) {
|
---|
[46] | 530 | if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
|
---|
[13] | 531 | exclude = true
|
---|
| 532 | break
|
---|
| 533 | }
|
---|
| 534 | }
|
---|
| 535 | if bytes.Equal(attrName, []byte("as")) {
|
---|
| 536 | if bytes.Equal(attrValue, []byte("script")) {
|
---|
| 537 | exclude = true
|
---|
| 538 | break
|
---|
| 539 | }
|
---|
| 540 | }
|
---|
| 541 | }
|
---|
| 542 |
|
---|
| 543 | if !exclude {
|
---|
| 544 | out.Write([]byte("<link"))
|
---|
| 545 | for _, attr := range attrs {
|
---|
[21] | 546 | sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
|
---|
[13] | 547 | }
|
---|
| 548 | out.Write([]byte(">"))
|
---|
| 549 | }
|
---|
| 550 | }
|
---|
| 551 |
|
---|
[45] | 552 | func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
|
---|
[1] | 553 | var http_equiv []byte
|
---|
| 554 | var content []byte
|
---|
| 555 |
|
---|
| 556 | for _, attr := range attrs {
|
---|
| 557 | attrName := attr[0]
|
---|
| 558 | attrValue := attr[1]
|
---|
| 559 | if bytes.Equal(attrName, []byte("http-equiv")) {
|
---|
| 560 | http_equiv = bytes.ToLower(attrValue)
|
---|
[46] | 561 | // exclude some <meta http-equiv="..." ..>
|
---|
| 562 | if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
|
---|
| 563 | return
|
---|
| 564 | }
|
---|
[1] | 565 | }
|
---|
| 566 | if bytes.Equal(attrName, []byte("content")) {
|
---|
| 567 | content = attrValue
|
---|
| 568 | }
|
---|
[45] | 569 | if bytes.Equal(attrName, []byte("charset")) {
|
---|
| 570 | // exclude <meta charset="...">
|
---|
| 571 | return
|
---|
| 572 | }
|
---|
[1] | 573 | }
|
---|
| 574 |
|
---|
[45] | 575 | out.Write([]byte("<meta"))
|
---|
[14] | 576 | urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
|
---|
| 577 | if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
|
---|
| 578 | contentUrl := content[urlIndex+4:]
|
---|
[36] | 579 | // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
|
---|
[37] | 580 | if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
|
---|
[36] | 581 | if contentUrl[0] == contentUrl[len(contentUrl)-1] {
|
---|
[37] | 582 | contentUrl = contentUrl[1 : len(contentUrl)-1]
|
---|
[36] | 583 | }
|
---|
| 584 | }
|
---|
| 585 | // output proxify result
|
---|
[60] | 586 | if uri, err := rc.ProxifyURI(contentUrl); err == nil {
|
---|
[14] | 587 | fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
|
---|
[1] | 588 | }
|
---|
| 589 | } else {
|
---|
[46] | 590 | if len(http_equiv) > 0 {
|
---|
| 591 | fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
|
---|
| 592 | }
|
---|
[9] | 593 | sanitizeAttrs(rc, out, attrs)
|
---|
[1] | 594 | }
|
---|
[45] | 595 | out.Write([]byte(">"))
|
---|
[1] | 596 | }
|
---|
| 597 |
|
---|
[9] | 598 | func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
|
---|
[1] | 599 | for _, attr := range attrs {
|
---|
[21] | 600 | sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
|
---|
[1] | 601 | }
|
---|
| 602 | }
|
---|
| 603 |
|
---|
[21] | 604 | func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
|
---|
[1] | 605 | if inArray(attrName, SAFE_ATTRIBUTES) {
|
---|
[21] | 606 | fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
|
---|
[1] | 607 | return
|
---|
| 608 | }
|
---|
| 609 | switch string(attrName) {
|
---|
| 610 | case "src", "href", "action":
|
---|
[60] | 611 | if uri, err := rc.ProxifyURI(attrValue); err == nil {
|
---|
[9] | 612 | fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
|
---|
[1] | 613 | } else {
|
---|
[36] | 614 | log.Println("cannot proxify uri:", string(attrValue))
|
---|
[1] | 615 | }
|
---|
| 616 | case "style":
|
---|
[21] | 617 | cssAttr := bytes.NewBuffer(nil)
|
---|
| 618 | sanitizeCSS(rc, cssAttr, attrValue)
|
---|
| 619 | fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
|
---|
[1] | 620 | }
|
---|
| 621 | }
|
---|
| 622 |
|
---|
[36] | 623 | func mergeURIs(u1, u2 *url.URL) *url.URL {
|
---|
[28] | 624 | return u1.ResolveReference(u2)
|
---|
[1] | 625 | }
|
---|
| 626 |
|
---|
[60] | 627 | // Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
|
---|
| 628 | // avoid memory allocation (except for the scheme)
|
---|
| 629 | func sanitizeURI(uri []byte) ([]byte, string) {
|
---|
| 630 | first_rune_index := 0
|
---|
| 631 | first_rune_seen := false
|
---|
| 632 | scheme_last_index := -1
|
---|
| 633 | buffer := bytes.NewBuffer(make([]byte, 0, 10))
|
---|
| 634 |
|
---|
| 635 | // remove trailing space and special characters
|
---|
| 636 | uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
|
---|
| 637 |
|
---|
| 638 | // loop over byte by byte
|
---|
| 639 | for i, c := range uri {
|
---|
| 640 | // ignore special characters and space (c <= 32)
|
---|
| 641 | if c > 32 {
|
---|
| 642 | // append to the lower case of the rune to buffer
|
---|
| 643 | if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
|
---|
| 644 | c = c + 'a' - 'A'
|
---|
| 645 | }
|
---|
| 646 |
|
---|
| 647 | buffer.WriteByte(c)
|
---|
| 648 |
|
---|
| 649 | // update the first rune index that is not a special rune
|
---|
| 650 | if !first_rune_seen {
|
---|
| 651 | first_rune_index = i
|
---|
| 652 | first_rune_seen = true
|
---|
| 653 | }
|
---|
| 654 |
|
---|
| 655 | if c == ':' {
|
---|
| 656 | // colon rune found, we have found the scheme
|
---|
| 657 | scheme_last_index = i
|
---|
| 658 | break
|
---|
| 659 | } else if c == '/' || c == '?' || c == '\\' || c == '#' {
|
---|
| 660 | // special case : most probably a relative URI
|
---|
| 661 | break
|
---|
| 662 | }
|
---|
| 663 | }
|
---|
| 664 | }
|
---|
| 665 |
|
---|
| 666 | if scheme_last_index != -1 {
|
---|
| 667 | // scheme found
|
---|
| 668 | // copy the "lower case without special runes scheme" before the ":" rune
|
---|
| 669 | scheme_start_index := scheme_last_index - buffer.Len() + 1
|
---|
| 670 | copy(uri[scheme_start_index:], buffer.Bytes())
|
---|
| 671 | // and return the result
|
---|
| 672 | return uri[scheme_start_index:], buffer.String()
|
---|
| 673 | } else {
|
---|
| 674 | // scheme NOT found
|
---|
| 675 | return uri[first_rune_index:], ""
|
---|
| 676 | }
|
---|
| 677 | }
|
---|
| 678 |
|
---|
| 679 | func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
|
---|
| 680 | // sanitize URI
|
---|
| 681 | uri, scheme := sanitizeURI(uri)
|
---|
| 682 |
|
---|
[28] | 683 | // remove javascript protocol
|
---|
[60] | 684 | if scheme == "javascript:" {
|
---|
[28] | 685 | return "", nil
|
---|
| 686 | }
|
---|
[57] | 687 |
|
---|
[1] | 688 | // TODO check malicious data: - e.g. data:script
|
---|
[60] | 689 | if scheme == "data:" {
|
---|
| 690 | return string(uri), nil
|
---|
[1] | 691 | }
|
---|
| 692 |
|
---|
[57] | 693 | // parse the uri
|
---|
[60] | 694 | u, err := url.Parse(string(uri))
|
---|
[1] | 695 | if err != nil {
|
---|
| 696 | return "", err
|
---|
| 697 | }
|
---|
[57] | 698 |
|
---|
| 699 | // get the fragment (with the prefix "#")
|
---|
| 700 | fragment := ""
|
---|
| 701 | if len(u.Fragment) > 0 {
|
---|
| 702 | fragment = "#" + u.Fragment
|
---|
| 703 | }
|
---|
| 704 |
|
---|
| 705 | // reset the fragment: it is not included in the mortyurl
|
---|
| 706 | u.Fragment = ""
|
---|
| 707 |
|
---|
| 708 | // merge the URI with the document URI
|
---|
[28] | 709 | u = mergeURIs(rc.BaseURL, u)
|
---|
[1] | 710 |
|
---|
[57] | 711 | // simple internal link ?
|
---|
| 712 | // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
|
---|
| 713 | if u.Scheme == rc.BaseURL.Scheme &&
|
---|
| 714 | (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
|
---|
| 715 | u.Host == rc.BaseURL.Host &&
|
---|
| 716 | u.Path == rc.BaseURL.Path &&
|
---|
| 717 | u.RawQuery == rc.BaseURL.RawQuery {
|
---|
| 718 | // the fragment is the only difference between the document URI and the uri parameter
|
---|
| 719 | return fragment, nil
|
---|
| 720 | }
|
---|
| 721 |
|
---|
| 722 | // return full URI and fragment (if not empty)
|
---|
[60] | 723 | morty_uri := u.String()
|
---|
[1] | 724 |
|
---|
| 725 | if rc.Key == nil {
|
---|
[60] | 726 | return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
|
---|
[1] | 727 | }
|
---|
[60] | 728 | return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
|
---|
[1] | 729 | }
|
---|
| 730 |
|
---|
| 731 | func inArray(b []byte, a [][]byte) bool {
|
---|
| 732 | for _, b2 := range a {
|
---|
| 733 | if bytes.Equal(b, b2) {
|
---|
| 734 | return true
|
---|
| 735 | }
|
---|
| 736 | }
|
---|
| 737 | return false
|
---|
| 738 | }
|
---|
| 739 |
|
---|
| 740 | func hash(msg string, key []byte) string {
|
---|
| 741 | mac := hmac.New(sha256.New, key)
|
---|
| 742 | mac.Write([]byte(msg))
|
---|
| 743 | return hex.EncodeToString(mac.Sum(nil))
|
---|
| 744 | }
|
---|
| 745 |
|
---|
| 746 | func verifyRequestURI(uri, hashMsg, key []byte) bool {
|
---|
| 747 | h := make([]byte, hex.DecodedLen(len(hashMsg)))
|
---|
| 748 | _, err := hex.Decode(h, hashMsg)
|
---|
| 749 | if err != nil {
|
---|
| 750 | log.Println("hmac error:", err)
|
---|
| 751 | return false
|
---|
| 752 | }
|
---|
| 753 | mac := hmac.New(sha256.New, key)
|
---|
| 754 | mac.Write(uri)
|
---|
| 755 | return hmac.Equal(h, mac.Sum(nil))
|
---|
| 756 | }
|
---|
| 757 |
|
---|
[35] | 758 | func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
|
---|
[1] | 759 | ctx.SetContentType("text/html")
|
---|
[35] | 760 | ctx.SetStatusCode(statusCode)
|
---|
[1] | 761 | ctx.Write([]byte(`<!doctype html>
|
---|
| 762 | <head>
|
---|
[11] | 763 | <title>MortyProxy</title>
|
---|
[36] | 764 | <meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
|
---|
[11] | 765 | <style>
|
---|
[36] | 766 | html { height: 100%; }
|
---|
| 767 | body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
|
---|
[11] | 768 | input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
|
---|
[36] | 769 | input[placeholder] { width:80%; }
|
---|
[11] | 770 | a { text-decoration: none; #2980b9; }
|
---|
| 771 | h1, h2 { font-weight: 200; margin-bottom: 2rem; }
|
---|
| 772 | h1 { font-size: 3em; }
|
---|
[36] | 773 | .container { flex:1; min-height: 100%; margin-bottom: 1em; }
|
---|
| 774 | .footer { margin: 1em; }
|
---|
[11] | 775 | .footer p { font-size: 0.8em; }
|
---|
| 776 | </style>
|
---|
[1] | 777 | </head>
|
---|
[11] | 778 | <body>
|
---|
[36] | 779 | <div class="container">
|
---|
| 780 | <h1>MortyProxy</h1>
|
---|
| 781 | `))
|
---|
[11] | 782 | if err != nil {
|
---|
| 783 | log.Println("error:", err)
|
---|
| 784 | ctx.Write([]byte("<h2>Error: "))
|
---|
| 785 | ctx.Write([]byte(html.EscapeString(err.Error())))
|
---|
| 786 | ctx.Write([]byte("</h2>"))
|
---|
| 787 | }
|
---|
[1] | 788 | if p.Key == nil {
|
---|
| 789 | ctx.Write([]byte(`
|
---|
[36] | 790 | <form action="post">
|
---|
| 791 | Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
|
---|
| 792 | <input type="submit" value="go" />
|
---|
| 793 | </form>`))
|
---|
[11] | 794 | } else {
|
---|
| 795 | ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
|
---|
[1] | 796 | }
|
---|
| 797 | ctx.Write([]byte(`
|
---|
[36] | 798 | </div>
|
---|
| 799 | <div class="footer">
|
---|
| 800 | <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
|
---|
| 801 | <a href="https://github.com/asciimoo/morty">view on github</a>
|
---|
| 802 | </p>
|
---|
| 803 | </div>
|
---|
[1] | 804 | </body>
|
---|
| 805 | </html>`))
|
---|
| 806 | }
|
---|
| 807 |
|
---|
| 808 | func main() {
|
---|
| 809 |
|
---|
[2] | 810 | listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
|
---|
[1] | 811 | key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
|
---|
[24] | 812 | ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
|
---|
[4] | 813 | requestTimeout := flag.Uint("timeout", 2, "Request timeout")
|
---|
[1] | 814 | flag.Parse()
|
---|
| 815 |
|
---|
[24] | 816 | if *ipv6 {
|
---|
| 817 | CLIENT.Dial = fasthttp.DialDualStack
|
---|
| 818 | }
|
---|
| 819 |
|
---|
[4] | 820 | p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
|
---|
[1] | 821 |
|
---|
| 822 | if *key != "" {
|
---|
| 823 | p.Key = []byte(*key)
|
---|
| 824 | }
|
---|
| 825 |
|
---|
| 826 | log.Println("listening on", *listen)
|
---|
| 827 |
|
---|
| 828 | if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
|
---|
| 829 | log.Fatal("Error in ListenAndServe:", err)
|
---|
| 830 | }
|
---|
| 831 | }
|
---|