source: code/trunk/morty.go@ 67

Last change on this file since 67 was 67, checked in by alex, 8 years ago

[mod] fix HTML in the welcome page. Make sure the morty header is always visible with the same CSS style. Add an empty favicon.ico.

File size: 22.5 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
14 "net/url"
15 "regexp"
16 "strings"
[4]17 "time"
[60]18 "unicode/utf8"
[1]19
20 "github.com/valyala/fasthttp"
21 "golang.org/x/net/html"
[45]22 "golang.org/x/net/html/charset"
23 "golang.org/x/text/encoding"
[1]24)
25
26const (
27 STATE_DEFAULT int = 0
28 STATE_IN_STYLE int = 1
29 STATE_IN_NOSCRIPT int = 2
30)
31
32var CLIENT *fasthttp.Client = &fasthttp.Client{
33 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
34}
35
[27]36var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]37
38var UNSAFE_ELEMENTS [][]byte = [][]byte{
39 []byte("applet"),
40 []byte("canvas"),
41 []byte("embed"),
42 //[]byte("iframe"),
[46]43 []byte("math"),
[1]44 []byte("script"),
[46]45 []byte("svg"),
[1]46}
47
48var SAFE_ATTRIBUTES [][]byte = [][]byte{
49 []byte("abbr"),
50 []byte("accesskey"),
51 []byte("align"),
52 []byte("alt"),
[13]53 []byte("as"),
[1]54 []byte("autocomplete"),
55 []byte("charset"),
56 []byte("checked"),
57 []byte("class"),
58 []byte("content"),
59 []byte("contenteditable"),
60 []byte("contextmenu"),
61 []byte("dir"),
62 []byte("for"),
63 []byte("height"),
64 []byte("hidden"),
[46]65 []byte("hreflang"),
[1]66 []byte("id"),
67 []byte("lang"),
68 []byte("media"),
69 []byte("method"),
70 []byte("name"),
71 []byte("nowrap"),
72 []byte("placeholder"),
73 []byte("property"),
74 []byte("rel"),
75 []byte("spellcheck"),
76 []byte("tabindex"),
77 []byte("target"),
78 []byte("title"),
79 []byte("translate"),
80 []byte("type"),
81 []byte("value"),
82 []byte("width"),
83}
84
85var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
86 []byte("area"),
87 []byte("base"),
88 []byte("br"),
89 []byte("col"),
90 []byte("embed"),
91 []byte("hr"),
92 []byte("img"),
93 []byte("input"),
94 []byte("keygen"),
95 []byte("link"),
96 []byte("meta"),
97 []byte("param"),
98 []byte("source"),
99 []byte("track"),
100 []byte("wbr"),
101}
102
[46]103var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
104 []byte("alternate"),
105 []byte("archives"),
106 []byte("author"),
107 []byte("copyright"),
108 []byte("first"),
109 []byte("help"),
110 []byte("icon"),
111 []byte("index"),
112 []byte("last"),
113 []byte("license"),
114 []byte("manifest"),
115 []byte("next"),
116 []byte("pingback"),
117 []byte("prev"),
118 []byte("publisher"),
119 []byte("search"),
120 []byte("shortcut icon"),
121 []byte("stylesheet"),
122 []byte("up"),
123}
124
125var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
126 // X-UA-Compatible will be added automaticaly, so it can be skipped
127 []byte("date"),
128 []byte("last-modified"),
[50]129 []byte("refresh"), // URL rewrite
[46]130 // []byte("location"), TODO URL rewrite
131 []byte("content-language"),
132}
133
[1]134type Proxy struct {
[4]135 Key []byte
136 RequestTimeout time.Duration
[1]137}
138
139type RequestConfig struct {
140 Key []byte
[23]141 BaseURL *url.URL
[1]142}
143
[2]144var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]145
146var HTML_BODY_EXTENSION string = `
147<div id="mortyheader">
148 <input type="checkbox" id="mortytoggle" autocomplete="off" />
[67]149 <p>This is a proxified and sanitized view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
[1]150</div>
151<style>
[67]152#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
153#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
154#mortyheader p { padding: 0 0 0.7em 0; display: block; }
155#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
156#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
[1]157input[type=checkbox]#mortytoggle { display: none; }
158input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
159</style>
160`
161
[46]162var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
163<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]164<meta name="referrer" content="no-referrer">
[46]165`
[45]166
[67]167var FAVICON_BYTES []byte
168
169func init() {
170 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
171
172 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
173}
174
[1]175func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]176
177 if appRequestHandler(ctx) {
178 return
179 }
180
[1]181 requestHash := popRequestParam(ctx, []byte("mortyhash"))
182
183 requestURI := popRequestParam(ctx, []byte("mortyurl"))
184
185 if requestURI == nil {
[35]186 p.serveMainPage(ctx, 200, nil)
[1]187 return
188 }
189
190 if p.Key != nil {
191 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]192 // HTTP status code 403 : Forbidden
193 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]194 return
195 }
196 }
197
198 parsedURI, err := url.Parse(string(requestURI))
199
[18]200 if strings.HasSuffix(parsedURI.Host, ".onion") {
[35]201 // HTTP status code 501 : Not Implemented
202 p.serveMainPage(ctx, 501, errors.New("Tor urls are not supported yet"))
[18]203 return
204 }
205
[11]206 if err != nil {
[35]207 // HTTP status code 500 : Internal Server Error
208 p.serveMainPage(ctx, 500, err)
[1]209 return
210 }
211
212 req := fasthttp.AcquireRequest()
213 defer fasthttp.ReleaseRequest(req)
[12]214 req.SetConnectionClose()
[1]215
[47]216 requestURIStr := string(requestURI)
[1]217
[47]218 log.Println("getting", requestURIStr)
[1]219
[47]220 req.SetRequestURI(requestURIStr)
[62]221 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
[1]222
223 resp := fasthttp.AcquireResponse()
224 defer fasthttp.ReleaseResponse(resp)
225
226 req.Header.SetMethodBytes(ctx.Method())
227 if ctx.IsPost() || ctx.IsPut() {
228 req.SetBody(ctx.PostBody())
229 }
230
[11]231 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
232
233 if err != nil {
[35]234 if err == fasthttp.ErrTimeout {
235 // HTTP status code 504 : Gateway Time-Out
236 p.serveMainPage(ctx, 504, err)
237 } else {
238 // HTTP status code 500 : Internal Server Error
239 p.serveMainPage(ctx, 500, err)
240 }
[1]241 return
242 }
243
244 if resp.StatusCode() != 200 {
245 switch resp.StatusCode() {
[7]246 case 301, 302, 303, 307, 308:
[1]247 loc := resp.Header.Peek("Location")
248 if loc != nil {
[23]249 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
[60]250 url, err := rc.ProxifyURI(loc)
[1]251 if err == nil {
252 ctx.SetStatusCode(resp.StatusCode())
253 ctx.Response.Header.Add("Location", url)
254 log.Println("redirect to", string(loc))
255 return
256 }
257 }
258 }
[47]259 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]260 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]261 return
262 }
263
[67]264 contentType := resp.Header.Peek("Content-Type")
[1]265
[67]266 if contentType == nil {
[35]267 // HTTP status code 503 : Service Unavailable
268 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]269 return
270 }
271
[67]272 if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
273 // HTTP status code 403 : Forbidden
274 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
[63]275 return
276 }
277
[67]278 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
[64]279
[1]280 var responseBody []byte
281
[67]282 if len(contentInfo) == 2 && bytes.Contains(contentInfo[0], []byte("text")) {
283 e, ename, _ := charset.DetermineEncoding(resp.Body(), string(contentType))
[45]284 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
285 responseBody, err = e.NewDecoder().Bytes(resp.Body())
286 if err != nil {
287 // HTTP status code 503 : Service Unavailable
288 p.serveMainPage(ctx, 503, err)
289 return
290 }
291 } else {
292 responseBody = resp.Body()
[1]293 }
294 } else {
295 responseBody = resp.Body()
296 }
297
[67]298 if bytes.Contains(contentType, []byte("xhtml")) {
299 ctx.SetContentType("text/html; charset=UTF-8")
300 } else {
301 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
302 }
[1]303
304 switch {
[67]305 case bytes.Contains(contentType, []byte("css")):
[23]306 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[67]307 case bytes.Contains(contentType, []byte("html")):
[23]308 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]309 default:
[67]310 if ctx.Request.Header.Peek("Content-Disposition") != nil {
311 ctx.Response.Header.AddBytesV("Content-Disposition", ctx.Request.Header.Peek("Content-Disposition"))
[39]312 }
[1]313 ctx.Write(responseBody)
314 }
315}
316
[10]317func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]318 // serve robots.txt
[10]319 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
320 ctx.SetContentType("text/plain")
321 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
322 return true
323 }
[11]324
[67]325 // server favicon.ico
326 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
327 ctx.SetContentType("image/png")
328 ctx.Write(FAVICON_BYTES)
329 return true
330 }
331
[10]332 return false
333}
334
[1]335func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
336 param := ctx.QueryArgs().PeekBytes(paramName)
337
338 if param == nil {
339 param = ctx.PostArgs().PeekBytes(paramName)
340 if param != nil {
341 ctx.PostArgs().DelBytes(paramName)
342 }
343 } else {
344 ctx.QueryArgs().DelBytes(paramName)
345 }
346
347 return param
348}
349
[9]350func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]351 // TODO
352
353 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
354
355 if urlSlices == nil {
[9]356 out.Write(css)
[1]357 return
358 }
359
360 startIndex := 0
361
362 for _, s := range urlSlices {
[15]363 urlStart := s[4]
364 urlEnd := s[5]
[1]365
[60]366 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]367 out.Write(css[startIndex:urlStart])
368 out.Write([]byte(uri))
[1]369 startIndex = urlEnd
370 } else {
[36]371 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]372 }
373 }
374 if startIndex < len(css) {
[9]375 out.Write(css[startIndex:len(css)])
[1]376 }
377}
378
[9]379func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]380 r := bytes.NewReader(htmlDoc)
381 decoder := html.NewTokenizer(r)
382 decoder.AllowCDATA(true)
383
384 unsafeElements := make([][]byte, 0, 8)
385 state := STATE_DEFAULT
386 for {
387 token := decoder.Next()
388 if token == html.ErrorToken {
389 err := decoder.Err()
390 if err != io.EOF {
391 log.Println("failed to parse HTML:")
392 }
393 break
394 }
395
396 if len(unsafeElements) == 0 {
397
398 switch token {
399 case html.StartTagToken, html.SelfClosingTagToken:
400 tag, hasAttrs := decoder.TagName()
401 safe := !inArray(tag, UNSAFE_ELEMENTS)
402 if !safe {
403 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
404 var unsafeTag []byte = make([]byte, len(tag))
405 copy(unsafeTag, tag)
406 unsafeElements = append(unsafeElements, unsafeTag)
407 }
408 break
409 }
[38]410 if bytes.Equal(tag, []byte("base")) {
411 for {
412 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]413 if bytes.Equal(attrName, []byte("href")) {
414 parsedURI, err := url.Parse(string(attrValue))
415 if err == nil {
416 rc.BaseURL = parsedURI
417 }
[38]418 }
419 if !moreAttr {
420 break
421 }
422 }
423 break
424 }
[1]425 if bytes.Equal(tag, []byte("noscript")) {
426 state = STATE_IN_NOSCRIPT
427 break
428 }
429 var attrs [][][]byte
430 if hasAttrs {
431 for {
432 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]433 attrs = append(attrs, [][]byte{
434 attrName,
435 attrValue,
436 []byte(html.EscapeString(string(attrValue))),
437 })
[1]438 if !moreAttr {
439 break
440 }
441 }
[13]442 }
443 if bytes.Equal(tag, []byte("link")) {
444 sanitizeLinkTag(rc, out, attrs)
445 break
446 }
447
[45]448 if bytes.Equal(tag, []byte("meta")) {
449 sanitizeMetaTag(rc, out, attrs)
450 break
451 }
452
[13]453 fmt.Fprintf(out, "<%s", tag)
454
455 if hasAttrs {
[45]456 sanitizeAttrs(rc, out, attrs)
[1]457 }
[13]458
[1]459 if token == html.SelfClosingTagToken {
[9]460 fmt.Fprintf(out, " />")
[1]461 } else {
[9]462 fmt.Fprintf(out, ">")
[1]463 if bytes.Equal(tag, []byte("style")) {
464 state = STATE_IN_STYLE
465 }
466 }
[13]467
[45]468 if bytes.Equal(tag, []byte("head")) {
[46]469 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]470 }
471
[1]472 if bytes.Equal(tag, []byte("form")) {
473 var formURL *url.URL
474 for _, attr := range attrs {
475 if bytes.Equal(attr[0], []byte("action")) {
476 formURL, _ = url.Parse(string(attr[1]))
[28]477 formURL = mergeURIs(rc.BaseURL, formURL)
[1]478 break
479 }
480 }
481 if formURL == nil {
[23]482 formURL = rc.BaseURL
[1]483 }
[2]484 urlStr := formURL.String()
485 var key string
486 if rc.Key != nil {
487 key = hash(urlStr, rc.Key)
488 }
[9]489 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]490
491 }
492
493 case html.EndTagToken:
494 tag, _ := decoder.TagName()
495 writeEndTag := true
496 switch string(tag) {
497 case "body":
[23]498 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]499 case "style":
500 state = STATE_DEFAULT
501 case "noscript":
502 state = STATE_DEFAULT
503 writeEndTag = false
504 }
505 // skip noscript tags - only the tag, not the content, because javascript is sanitized
506 if writeEndTag {
[9]507 fmt.Fprintf(out, "</%s>", tag)
[1]508 }
509
510 case html.TextToken:
511 switch state {
512 case STATE_DEFAULT:
[9]513 fmt.Fprintf(out, "%s", decoder.Raw())
[1]514 case STATE_IN_STYLE:
[9]515 sanitizeCSS(rc, out, decoder.Raw())
[1]516 case STATE_IN_NOSCRIPT:
[9]517 sanitizeHTML(rc, out, decoder.Raw())
[1]518 }
519
[62]520 case html.CommentToken:
521 // ignore comment. TODO : parse IE conditional comment
522
523 case html.DoctypeToken:
[9]524 out.Write(decoder.Raw())
[1]525 }
526 } else {
527 switch token {
528 case html.StartTagToken:
529 tag, _ := decoder.TagName()
530 if inArray(tag, UNSAFE_ELEMENTS) {
531 unsafeElements = append(unsafeElements, tag)
532 }
533
534 case html.EndTagToken:
535 tag, _ := decoder.TagName()
536 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
537 unsafeElements = unsafeElements[:len(unsafeElements)-1]
538 }
539 }
540 }
541 }
542}
543
[13]544func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
545 exclude := false
546 for _, attr := range attrs {
547 attrName := attr[0]
548 attrValue := attr[1]
549 if bytes.Equal(attrName, []byte("rel")) {
[46]550 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]551 exclude = true
552 break
553 }
554 }
555 if bytes.Equal(attrName, []byte("as")) {
556 if bytes.Equal(attrValue, []byte("script")) {
557 exclude = true
558 break
559 }
560 }
561 }
562
563 if !exclude {
564 out.Write([]byte("<link"))
565 for _, attr := range attrs {
[21]566 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]567 }
568 out.Write([]byte(">"))
569 }
570}
571
[45]572func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]573 var http_equiv []byte
574 var content []byte
575
576 for _, attr := range attrs {
577 attrName := attr[0]
578 attrValue := attr[1]
579 if bytes.Equal(attrName, []byte("http-equiv")) {
580 http_equiv = bytes.ToLower(attrValue)
[46]581 // exclude some <meta http-equiv="..." ..>
582 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
583 return
584 }
[1]585 }
586 if bytes.Equal(attrName, []byte("content")) {
587 content = attrValue
588 }
[45]589 if bytes.Equal(attrName, []byte("charset")) {
590 // exclude <meta charset="...">
591 return
592 }
[1]593 }
594
[45]595 out.Write([]byte("<meta"))
[14]596 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
597 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
598 contentUrl := content[urlIndex+4:]
[36]599 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]600 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]601 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]602 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]603 }
604 }
605 // output proxify result
[60]606 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]607 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]608 }
609 } else {
[46]610 if len(http_equiv) > 0 {
611 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
612 }
[9]613 sanitizeAttrs(rc, out, attrs)
[1]614 }
[45]615 out.Write([]byte(">"))
[1]616}
617
[9]618func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]619 for _, attr := range attrs {
[21]620 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]621 }
622}
623
[21]624func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]625 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]626 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]627 return
628 }
629 switch string(attrName) {
630 case "src", "href", "action":
[60]631 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]632 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]633 } else {
[36]634 log.Println("cannot proxify uri:", string(attrValue))
[1]635 }
636 case "style":
[21]637 cssAttr := bytes.NewBuffer(nil)
638 sanitizeCSS(rc, cssAttr, attrValue)
639 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]640 }
641}
642
[36]643func mergeURIs(u1, u2 *url.URL) *url.URL {
[28]644 return u1.ResolveReference(u2)
[1]645}
646
[60]647// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
648// avoid memory allocation (except for the scheme)
649func sanitizeURI(uri []byte) ([]byte, string) {
650 first_rune_index := 0
651 first_rune_seen := false
652 scheme_last_index := -1
653 buffer := bytes.NewBuffer(make([]byte, 0, 10))
654
655 // remove trailing space and special characters
656 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
657
658 // loop over byte by byte
659 for i, c := range uri {
660 // ignore special characters and space (c <= 32)
661 if c > 32 {
662 // append to the lower case of the rune to buffer
663 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
664 c = c + 'a' - 'A'
665 }
666
667 buffer.WriteByte(c)
668
669 // update the first rune index that is not a special rune
670 if !first_rune_seen {
671 first_rune_index = i
672 first_rune_seen = true
673 }
674
675 if c == ':' {
676 // colon rune found, we have found the scheme
677 scheme_last_index = i
678 break
679 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
680 // special case : most probably a relative URI
681 break
682 }
683 }
684 }
685
686 if scheme_last_index != -1 {
687 // scheme found
688 // copy the "lower case without special runes scheme" before the ":" rune
689 scheme_start_index := scheme_last_index - buffer.Len() + 1
690 copy(uri[scheme_start_index:], buffer.Bytes())
691 // and return the result
692 return uri[scheme_start_index:], buffer.String()
693 } else {
694 // scheme NOT found
695 return uri[first_rune_index:], ""
696 }
697}
698
699func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
700 // sanitize URI
701 uri, scheme := sanitizeURI(uri)
702
[28]703 // remove javascript protocol
[60]704 if scheme == "javascript:" {
[28]705 return "", nil
706 }
[57]707
[1]708 // TODO check malicious data: - e.g. data:script
[60]709 if scheme == "data:" {
[61]710 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
711 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
712 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
713 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
714 bytes.HasPrefix(uri, []byte("data:image/webp")) {
715 // should be safe
716 return string(uri), nil
717 } else {
718 // unsafe data
719 return "", nil
720 }
[1]721 }
722
[57]723 // parse the uri
[60]724 u, err := url.Parse(string(uri))
[1]725 if err != nil {
726 return "", err
727 }
[57]728
729 // get the fragment (with the prefix "#")
730 fragment := ""
731 if len(u.Fragment) > 0 {
732 fragment = "#" + u.Fragment
733 }
734
735 // reset the fragment: it is not included in the mortyurl
736 u.Fragment = ""
737
738 // merge the URI with the document URI
[28]739 u = mergeURIs(rc.BaseURL, u)
[1]740
[57]741 // simple internal link ?
742 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
743 if u.Scheme == rc.BaseURL.Scheme &&
744 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
745 u.Host == rc.BaseURL.Host &&
746 u.Path == rc.BaseURL.Path &&
747 u.RawQuery == rc.BaseURL.RawQuery {
748 // the fragment is the only difference between the document URI and the uri parameter
749 return fragment, nil
750 }
751
752 // return full URI and fragment (if not empty)
[60]753 morty_uri := u.String()
[1]754
755 if rc.Key == nil {
[60]756 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]757 }
[60]758 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]759}
760
761func inArray(b []byte, a [][]byte) bool {
762 for _, b2 := range a {
763 if bytes.Equal(b, b2) {
764 return true
765 }
766 }
767 return false
768}
769
770func hash(msg string, key []byte) string {
771 mac := hmac.New(sha256.New, key)
772 mac.Write([]byte(msg))
773 return hex.EncodeToString(mac.Sum(nil))
774}
775
776func verifyRequestURI(uri, hashMsg, key []byte) bool {
777 h := make([]byte, hex.DecodedLen(len(hashMsg)))
778 _, err := hex.Decode(h, hashMsg)
779 if err != nil {
780 log.Println("hmac error:", err)
781 return false
782 }
783 mac := hmac.New(sha256.New, key)
784 mac.Write(uri)
785 return hmac.Equal(h, mac.Sum(nil))
786}
787
[35]788func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]789 ctx.SetContentType("text/html; charset=UTF-8")
[35]790 ctx.SetStatusCode(statusCode)
[1]791 ctx.Write([]byte(`<!doctype html>
[67]792<html>
[1]793<head>
[11]794<title>MortyProxy</title>
[36]795<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
[11]796<style>
[36]797html { height: 100%; }
798body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
[11]799input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
[36]800input[placeholder] { width:80%; }
[11]801a { text-decoration: none; #2980b9; }
802h1, h2 { font-weight: 200; margin-bottom: 2rem; }
803h1 { font-size: 3em; }
[36]804.container { flex:1; min-height: 100%; margin-bottom: 1em; }
805.footer { margin: 1em; }
[11]806.footer p { font-size: 0.8em; }
807</style>
[1]808</head>
[11]809<body>
[36]810 <div class="container">
811 <h1>MortyProxy</h1>
812`))
[11]813 if err != nil {
814 log.Println("error:", err)
815 ctx.Write([]byte("<h2>Error: "))
816 ctx.Write([]byte(html.EscapeString(err.Error())))
817 ctx.Write([]byte("</h2>"))
818 }
[1]819 if p.Key == nil {
820 ctx.Write([]byte(`
[36]821 <form action="post">
822 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
823 <input type="submit" value="go" />
824 </form>`))
[11]825 } else {
826 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]827 }
828 ctx.Write([]byte(`
[36]829 </div>
830 <div class="footer">
831 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
832 <a href="https://github.com/asciimoo/morty">view on github</a>
833 </p>
834 </div>
[1]835</body>
836</html>`))
837}
838
839func main() {
840
[2]841 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]842 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[24]843 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[4]844 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]845 flag.Parse()
846
[24]847 if *ipv6 {
848 CLIENT.Dial = fasthttp.DialDualStack
849 }
850
[4]851 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]852
853 if *key != "" {
854 p.Key = []byte(*key)
855 }
856
857 log.Println("listening on", *listen)
858
859 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
860 log.Fatal("Error in ListenAndServe:", err)
861 }
862}
Note: See TracBrowser for help on using the repository browser.