source: code/trunk/morty.go@ 62

Last change on this file since 62 was 62, checked in by asciimoo, 9 years ago

Merge pull request #38 from dalf/uri

[enh] ignore all special characters in the URI protocol

File size: 21.7 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "regexp"
15 "strings"
[4]16 "time"
[60]17 "unicode/utf8"
[1]18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
[45]21 "golang.org/x/net/html/charset"
22 "golang.org/x/text/encoding"
[1]23)
24
25const (
26 STATE_DEFAULT int = 0
27 STATE_IN_STYLE int = 1
28 STATE_IN_NOSCRIPT int = 2
29)
30
31var CLIENT *fasthttp.Client = &fasthttp.Client{
32 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
33}
34
[27]35var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]36
37var UNSAFE_ELEMENTS [][]byte = [][]byte{
38 []byte("applet"),
39 []byte("canvas"),
40 []byte("embed"),
41 //[]byte("iframe"),
[46]42 []byte("math"),
[1]43 []byte("script"),
[46]44 []byte("svg"),
[1]45}
46
47var SAFE_ATTRIBUTES [][]byte = [][]byte{
48 []byte("abbr"),
49 []byte("accesskey"),
50 []byte("align"),
51 []byte("alt"),
[13]52 []byte("as"),
[1]53 []byte("autocomplete"),
54 []byte("charset"),
55 []byte("checked"),
56 []byte("class"),
57 []byte("content"),
58 []byte("contenteditable"),
59 []byte("contextmenu"),
60 []byte("dir"),
61 []byte("for"),
62 []byte("height"),
63 []byte("hidden"),
[46]64 []byte("hreflang"),
[1]65 []byte("id"),
66 []byte("lang"),
67 []byte("media"),
68 []byte("method"),
69 []byte("name"),
70 []byte("nowrap"),
71 []byte("placeholder"),
72 []byte("property"),
73 []byte("rel"),
74 []byte("spellcheck"),
75 []byte("tabindex"),
76 []byte("target"),
77 []byte("title"),
78 []byte("translate"),
79 []byte("type"),
80 []byte("value"),
81 []byte("width"),
82}
83
84var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
85 []byte("area"),
86 []byte("base"),
87 []byte("br"),
88 []byte("col"),
89 []byte("embed"),
90 []byte("hr"),
91 []byte("img"),
92 []byte("input"),
93 []byte("keygen"),
94 []byte("link"),
95 []byte("meta"),
96 []byte("param"),
97 []byte("source"),
98 []byte("track"),
99 []byte("wbr"),
100}
101
[46]102var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
103 []byte("alternate"),
104 []byte("archives"),
105 []byte("author"),
106 []byte("copyright"),
107 []byte("first"),
108 []byte("help"),
109 []byte("icon"),
110 []byte("index"),
111 []byte("last"),
112 []byte("license"),
113 []byte("manifest"),
114 []byte("next"),
115 []byte("pingback"),
116 []byte("prev"),
117 []byte("publisher"),
118 []byte("search"),
119 []byte("shortcut icon"),
120 []byte("stylesheet"),
121 []byte("up"),
122}
123
124var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
125 // X-UA-Compatible will be added automaticaly, so it can be skipped
126 []byte("date"),
127 []byte("last-modified"),
[50]128 []byte("refresh"), // URL rewrite
[46]129 // []byte("location"), TODO URL rewrite
130 []byte("content-language"),
131}
132
[1]133type Proxy struct {
[4]134 Key []byte
135 RequestTimeout time.Duration
[1]136}
137
138type RequestConfig struct {
139 Key []byte
[23]140 BaseURL *url.URL
[1]141}
142
[2]143var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]144
145var HTML_BODY_EXTENSION string = `
146<div id="mortyheader">
147 <input type="checkbox" id="mortytoggle" autocomplete="off" />
[36]148 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p></div>
[1]149</div>
150<style>
[36]151#mortyheader { position: fixed; padding: 12px 12px 12px 0; margin: 0; box-sizing: content-box; top: 15%%; left: 0; max-width: 140px; color: #444; overflow: hidden; z-index: 110000; font-size: 12px; line-height: normal; }
152#mortyheader a { color: #3498db; font-weight: bold; }
153#mortyheader p { padding: 0 0 0.7em 0; margin: 0; }
154#mortyheader > div { padding: 8px; font-size: 12px !important; font-family: sans !important; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
[5]155#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
[1]156input[type=checkbox]#mortytoggle { display: none; }
157input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
158</style>
159`
160
[46]161var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
162<meta http-equiv="X-UA-Compatible" content="IE=edge">
163`
[45]164
[1]165func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]166
167 if appRequestHandler(ctx) {
168 return
169 }
170
[1]171 requestHash := popRequestParam(ctx, []byte("mortyhash"))
172
173 requestURI := popRequestParam(ctx, []byte("mortyurl"))
174
175 if requestURI == nil {
[35]176 p.serveMainPage(ctx, 200, nil)
[1]177 return
178 }
179
180 if p.Key != nil {
181 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]182 // HTTP status code 403 : Forbidden
183 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]184 return
185 }
186 }
187
188 parsedURI, err := url.Parse(string(requestURI))
189
[18]190 if strings.HasSuffix(parsedURI.Host, ".onion") {
[35]191 // HTTP status code 501 : Not Implemented
192 p.serveMainPage(ctx, 501, errors.New("Tor urls are not supported yet"))
[18]193 return
194 }
195
[11]196 if err != nil {
[35]197 // HTTP status code 500 : Internal Server Error
198 p.serveMainPage(ctx, 500, err)
[1]199 return
200 }
201
202 req := fasthttp.AcquireRequest()
203 defer fasthttp.ReleaseRequest(req)
[12]204 req.SetConnectionClose()
[1]205
[47]206 requestURIStr := string(requestURI)
[1]207
[47]208 log.Println("getting", requestURIStr)
[1]209
[47]210 req.SetRequestURI(requestURIStr)
[62]211 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
[1]212
213 resp := fasthttp.AcquireResponse()
214 defer fasthttp.ReleaseResponse(resp)
215
216 req.Header.SetMethodBytes(ctx.Method())
217 if ctx.IsPost() || ctx.IsPut() {
218 req.SetBody(ctx.PostBody())
219 }
220
[11]221 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
222
223 if err != nil {
[35]224 if err == fasthttp.ErrTimeout {
225 // HTTP status code 504 : Gateway Time-Out
226 p.serveMainPage(ctx, 504, err)
227 } else {
228 // HTTP status code 500 : Internal Server Error
229 p.serveMainPage(ctx, 500, err)
230 }
[1]231 return
232 }
233
234 if resp.StatusCode() != 200 {
235 switch resp.StatusCode() {
[7]236 case 301, 302, 303, 307, 308:
[1]237 loc := resp.Header.Peek("Location")
238 if loc != nil {
[23]239 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
[60]240 url, err := rc.ProxifyURI(loc)
[1]241 if err == nil {
242 ctx.SetStatusCode(resp.StatusCode())
243 ctx.Response.Header.Add("Location", url)
244 log.Println("redirect to", string(loc))
245 return
246 }
247 }
248 }
[47]249 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]250 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]251 return
252 }
253
254 contentType := resp.Header.Peek("Content-Type")
255
256 if contentType == nil {
[35]257 // HTTP status code 503 : Service Unavailable
258 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]259 return
260 }
261
[17]262 if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
[35]263 // HTTP status code 403 : Forbidden
264 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
[17]265 return
266 }
267
[1]268 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
269
270 var responseBody []byte
271
[45]272 if len(contentInfo) == 2 && bytes.Contains(contentInfo[0], []byte("text")) {
273 e, ename, _ := charset.DetermineEncoding(resp.Body(), string(contentType))
274 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
275 responseBody, err = e.NewDecoder().Bytes(resp.Body())
276 if err != nil {
277 // HTTP status code 503 : Service Unavailable
278 p.serveMainPage(ctx, 503, err)
279 return
280 }
281 } else {
282 responseBody = resp.Body()
[1]283 }
284 } else {
285 responseBody = resp.Body()
286 }
287
[52]288 if bytes.Contains(contentType, []byte("xhtml")) {
289 ctx.SetContentType("text/html; charset=UTF-8")
290 } else {
291 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
292 }
[1]293
294 switch {
295 case bytes.Contains(contentType, []byte("css")):
[23]296 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]297 case bytes.Contains(contentType, []byte("html")):
[23]298 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]299 default:
[39]300 if ctx.Request.Header.Peek("Content-Disposition") != nil {
301 ctx.Response.Header.AddBytesV("Content-Disposition", ctx.Request.Header.Peek("Content-Disposition"))
302 }
[1]303 ctx.Write(responseBody)
304 }
305}
306
[10]307func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]308 // serve robots.txt
[10]309 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
310 ctx.SetContentType("text/plain")
311 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
312 return true
313 }
[11]314
[10]315 return false
316}
317
[1]318func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
319 param := ctx.QueryArgs().PeekBytes(paramName)
320
321 if param == nil {
322 param = ctx.PostArgs().PeekBytes(paramName)
323 if param != nil {
324 ctx.PostArgs().DelBytes(paramName)
325 }
326 } else {
327 ctx.QueryArgs().DelBytes(paramName)
328 }
329
330 return param
331}
332
[9]333func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]334 // TODO
335
336 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
337
338 if urlSlices == nil {
[9]339 out.Write(css)
[1]340 return
341 }
342
343 startIndex := 0
344
345 for _, s := range urlSlices {
[15]346 urlStart := s[4]
347 urlEnd := s[5]
[1]348
[60]349 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]350 out.Write(css[startIndex:urlStart])
351 out.Write([]byte(uri))
[1]352 startIndex = urlEnd
353 } else {
[36]354 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]355 }
356 }
357 if startIndex < len(css) {
[9]358 out.Write(css[startIndex:len(css)])
[1]359 }
360}
361
[9]362func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]363 r := bytes.NewReader(htmlDoc)
364 decoder := html.NewTokenizer(r)
365 decoder.AllowCDATA(true)
366
367 unsafeElements := make([][]byte, 0, 8)
368 state := STATE_DEFAULT
369 for {
370 token := decoder.Next()
371 if token == html.ErrorToken {
372 err := decoder.Err()
373 if err != io.EOF {
374 log.Println("failed to parse HTML:")
375 }
376 break
377 }
378
379 if len(unsafeElements) == 0 {
380
381 switch token {
382 case html.StartTagToken, html.SelfClosingTagToken:
383 tag, hasAttrs := decoder.TagName()
384 safe := !inArray(tag, UNSAFE_ELEMENTS)
385 if !safe {
386 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
387 var unsafeTag []byte = make([]byte, len(tag))
388 copy(unsafeTag, tag)
389 unsafeElements = append(unsafeElements, unsafeTag)
390 }
391 break
392 }
[38]393 if bytes.Equal(tag, []byte("base")) {
394 for {
395 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]396 if bytes.Equal(attrName, []byte("href")) {
397 parsedURI, err := url.Parse(string(attrValue))
398 if err == nil {
399 rc.BaseURL = parsedURI
400 }
[38]401 }
402 if !moreAttr {
403 break
404 }
405 }
406 break
407 }
[1]408 if bytes.Equal(tag, []byte("noscript")) {
409 state = STATE_IN_NOSCRIPT
410 break
411 }
412 var attrs [][][]byte
413 if hasAttrs {
414 for {
415 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]416 attrs = append(attrs, [][]byte{
417 attrName,
418 attrValue,
419 []byte(html.EscapeString(string(attrValue))),
420 })
[1]421 if !moreAttr {
422 break
423 }
424 }
[13]425 }
426 if bytes.Equal(tag, []byte("link")) {
427 sanitizeLinkTag(rc, out, attrs)
428 break
429 }
430
[45]431 if bytes.Equal(tag, []byte("meta")) {
432 sanitizeMetaTag(rc, out, attrs)
433 break
434 }
435
[13]436 fmt.Fprintf(out, "<%s", tag)
437
438 if hasAttrs {
[45]439 sanitizeAttrs(rc, out, attrs)
[1]440 }
[13]441
[1]442 if token == html.SelfClosingTagToken {
[9]443 fmt.Fprintf(out, " />")
[1]444 } else {
[9]445 fmt.Fprintf(out, ">")
[1]446 if bytes.Equal(tag, []byte("style")) {
447 state = STATE_IN_STYLE
448 }
449 }
[13]450
[45]451 if bytes.Equal(tag, []byte("head")) {
[46]452 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]453 }
454
[1]455 if bytes.Equal(tag, []byte("form")) {
456 var formURL *url.URL
457 for _, attr := range attrs {
458 if bytes.Equal(attr[0], []byte("action")) {
459 formURL, _ = url.Parse(string(attr[1]))
[28]460 formURL = mergeURIs(rc.BaseURL, formURL)
[1]461 break
462 }
463 }
464 if formURL == nil {
[23]465 formURL = rc.BaseURL
[1]466 }
[2]467 urlStr := formURL.String()
468 var key string
469 if rc.Key != nil {
470 key = hash(urlStr, rc.Key)
471 }
[9]472 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]473
474 }
475
476 case html.EndTagToken:
477 tag, _ := decoder.TagName()
478 writeEndTag := true
479 switch string(tag) {
480 case "body":
[23]481 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]482 case "style":
483 state = STATE_DEFAULT
484 case "noscript":
485 state = STATE_DEFAULT
486 writeEndTag = false
487 }
488 // skip noscript tags - only the tag, not the content, because javascript is sanitized
489 if writeEndTag {
[9]490 fmt.Fprintf(out, "</%s>", tag)
[1]491 }
492
493 case html.TextToken:
494 switch state {
495 case STATE_DEFAULT:
[9]496 fmt.Fprintf(out, "%s", decoder.Raw())
[1]497 case STATE_IN_STYLE:
[9]498 sanitizeCSS(rc, out, decoder.Raw())
[1]499 case STATE_IN_NOSCRIPT:
[9]500 sanitizeHTML(rc, out, decoder.Raw())
[1]501 }
502
[62]503 case html.CommentToken:
504 // ignore comment. TODO : parse IE conditional comment
505
506 case html.DoctypeToken:
[9]507 out.Write(decoder.Raw())
[1]508 }
509 } else {
510 switch token {
511 case html.StartTagToken:
512 tag, _ := decoder.TagName()
513 if inArray(tag, UNSAFE_ELEMENTS) {
514 unsafeElements = append(unsafeElements, tag)
515 }
516
517 case html.EndTagToken:
518 tag, _ := decoder.TagName()
519 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
520 unsafeElements = unsafeElements[:len(unsafeElements)-1]
521 }
522 }
523 }
524 }
525}
526
[13]527func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
528 exclude := false
529 for _, attr := range attrs {
530 attrName := attr[0]
531 attrValue := attr[1]
532 if bytes.Equal(attrName, []byte("rel")) {
[46]533 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]534 exclude = true
535 break
536 }
537 }
538 if bytes.Equal(attrName, []byte("as")) {
539 if bytes.Equal(attrValue, []byte("script")) {
540 exclude = true
541 break
542 }
543 }
544 }
545
546 if !exclude {
547 out.Write([]byte("<link"))
548 for _, attr := range attrs {
[21]549 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]550 }
551 out.Write([]byte(">"))
552 }
553}
554
[45]555func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]556 var http_equiv []byte
557 var content []byte
558
559 for _, attr := range attrs {
560 attrName := attr[0]
561 attrValue := attr[1]
562 if bytes.Equal(attrName, []byte("http-equiv")) {
563 http_equiv = bytes.ToLower(attrValue)
[46]564 // exclude some <meta http-equiv="..." ..>
565 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
566 return
567 }
[1]568 }
569 if bytes.Equal(attrName, []byte("content")) {
570 content = attrValue
571 }
[45]572 if bytes.Equal(attrName, []byte("charset")) {
573 // exclude <meta charset="...">
574 return
575 }
[1]576 }
577
[45]578 out.Write([]byte("<meta"))
[14]579 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
580 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
581 contentUrl := content[urlIndex+4:]
[36]582 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]583 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]584 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]585 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]586 }
587 }
588 // output proxify result
[60]589 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]590 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]591 }
592 } else {
[46]593 if len(http_equiv) > 0 {
594 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
595 }
[9]596 sanitizeAttrs(rc, out, attrs)
[1]597 }
[45]598 out.Write([]byte(">"))
[1]599}
600
[9]601func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]602 for _, attr := range attrs {
[21]603 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]604 }
605}
606
[21]607func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]608 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]609 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]610 return
611 }
612 switch string(attrName) {
613 case "src", "href", "action":
[60]614 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]615 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]616 } else {
[36]617 log.Println("cannot proxify uri:", string(attrValue))
[1]618 }
619 case "style":
[21]620 cssAttr := bytes.NewBuffer(nil)
621 sanitizeCSS(rc, cssAttr, attrValue)
622 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]623 }
624}
625
[36]626func mergeURIs(u1, u2 *url.URL) *url.URL {
[28]627 return u1.ResolveReference(u2)
[1]628}
629
[60]630// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
631// avoid memory allocation (except for the scheme)
632func sanitizeURI(uri []byte) ([]byte, string) {
633 first_rune_index := 0
634 first_rune_seen := false
635 scheme_last_index := -1
636 buffer := bytes.NewBuffer(make([]byte, 0, 10))
637
638 // remove trailing space and special characters
639 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
640
641 // loop over byte by byte
642 for i, c := range uri {
643 // ignore special characters and space (c <= 32)
644 if c > 32 {
645 // append to the lower case of the rune to buffer
646 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
647 c = c + 'a' - 'A'
648 }
649
650 buffer.WriteByte(c)
651
652 // update the first rune index that is not a special rune
653 if !first_rune_seen {
654 first_rune_index = i
655 first_rune_seen = true
656 }
657
658 if c == ':' {
659 // colon rune found, we have found the scheme
660 scheme_last_index = i
661 break
662 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
663 // special case : most probably a relative URI
664 break
665 }
666 }
667 }
668
669 if scheme_last_index != -1 {
670 // scheme found
671 // copy the "lower case without special runes scheme" before the ":" rune
672 scheme_start_index := scheme_last_index - buffer.Len() + 1
673 copy(uri[scheme_start_index:], buffer.Bytes())
674 // and return the result
675 return uri[scheme_start_index:], buffer.String()
676 } else {
677 // scheme NOT found
678 return uri[first_rune_index:], ""
679 }
680}
681
682func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
683 // sanitize URI
684 uri, scheme := sanitizeURI(uri)
685
[28]686 // remove javascript protocol
[60]687 if scheme == "javascript:" {
[28]688 return "", nil
689 }
[57]690
[1]691 // TODO check malicious data: - e.g. data:script
[60]692 if scheme == "data:" {
[61]693 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
694 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
695 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
696 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
697 bytes.HasPrefix(uri, []byte("data:image/webp")) {
698 // should be safe
699 return string(uri), nil
700 } else {
701 // unsafe data
702 return "", nil
703 }
[1]704 }
705
[57]706 // parse the uri
[60]707 u, err := url.Parse(string(uri))
[1]708 if err != nil {
709 return "", err
710 }
[57]711
712 // get the fragment (with the prefix "#")
713 fragment := ""
714 if len(u.Fragment) > 0 {
715 fragment = "#" + u.Fragment
716 }
717
718 // reset the fragment: it is not included in the mortyurl
719 u.Fragment = ""
720
721 // merge the URI with the document URI
[28]722 u = mergeURIs(rc.BaseURL, u)
[1]723
[57]724 // simple internal link ?
725 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
726 if u.Scheme == rc.BaseURL.Scheme &&
727 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
728 u.Host == rc.BaseURL.Host &&
729 u.Path == rc.BaseURL.Path &&
730 u.RawQuery == rc.BaseURL.RawQuery {
731 // the fragment is the only difference between the document URI and the uri parameter
732 return fragment, nil
733 }
734
735 // return full URI and fragment (if not empty)
[60]736 morty_uri := u.String()
[1]737
738 if rc.Key == nil {
[60]739 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]740 }
[60]741 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]742}
743
744func inArray(b []byte, a [][]byte) bool {
745 for _, b2 := range a {
746 if bytes.Equal(b, b2) {
747 return true
748 }
749 }
750 return false
751}
752
753func hash(msg string, key []byte) string {
754 mac := hmac.New(sha256.New, key)
755 mac.Write([]byte(msg))
756 return hex.EncodeToString(mac.Sum(nil))
757}
758
759func verifyRequestURI(uri, hashMsg, key []byte) bool {
760 h := make([]byte, hex.DecodedLen(len(hashMsg)))
761 _, err := hex.Decode(h, hashMsg)
762 if err != nil {
763 log.Println("hmac error:", err)
764 return false
765 }
766 mac := hmac.New(sha256.New, key)
767 mac.Write(uri)
768 return hmac.Equal(h, mac.Sum(nil))
769}
770
[35]771func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[1]772 ctx.SetContentType("text/html")
[35]773 ctx.SetStatusCode(statusCode)
[1]774 ctx.Write([]byte(`<!doctype html>
775<head>
[11]776<title>MortyProxy</title>
[36]777<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
[11]778<style>
[36]779html { height: 100%; }
780body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
[11]781input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
[36]782input[placeholder] { width:80%; }
[11]783a { text-decoration: none; #2980b9; }
784h1, h2 { font-weight: 200; margin-bottom: 2rem; }
785h1 { font-size: 3em; }
[36]786.container { flex:1; min-height: 100%; margin-bottom: 1em; }
787.footer { margin: 1em; }
[11]788.footer p { font-size: 0.8em; }
789</style>
[1]790</head>
[11]791<body>
[36]792 <div class="container">
793 <h1>MortyProxy</h1>
794`))
[11]795 if err != nil {
796 log.Println("error:", err)
797 ctx.Write([]byte("<h2>Error: "))
798 ctx.Write([]byte(html.EscapeString(err.Error())))
799 ctx.Write([]byte("</h2>"))
800 }
[1]801 if p.Key == nil {
802 ctx.Write([]byte(`
[36]803 <form action="post">
804 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
805 <input type="submit" value="go" />
806 </form>`))
[11]807 } else {
808 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]809 }
810 ctx.Write([]byte(`
[36]811 </div>
812 <div class="footer">
813 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
814 <a href="https://github.com/asciimoo/morty">view on github</a>
815 </p>
816 </div>
[1]817</body>
818</html>`))
819}
820
821func main() {
822
[2]823 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]824 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[24]825 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[4]826 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]827 flag.Parse()
828
[24]829 if *ipv6 {
830 CLIENT.Dial = fasthttp.DialDualStack
831 }
832
[4]833 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]834
835 if *key != "" {
836 p.Key = []byte(*key)
837 }
838
839 log.Println("listening on", *listen)
840
841 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
842 log.Fatal("Error in ListenAndServe:", err)
843 }
844}
Note: See TracBrowser for help on using the repository browser.