source: code/trunk/morty.go@ 9

Last change on this file since 9 was 9, checked in by asciimoo, 9 years ago

[mod] use io.Writer in sanitizers

File size: 13.4 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
[4]17 "time"
[1]18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
31 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
32}
33
[8]34var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("(url\\(|@import +)(['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]35
36var UNSAFE_ELEMENTS [][]byte = [][]byte{
37 []byte("applet"),
38 []byte("canvas"),
39 []byte("embed"),
40 //[]byte("iframe"),
41 []byte("script"),
42}
43
44var SAFE_ATTRIBUTES [][]byte = [][]byte{
45 []byte("abbr"),
46 []byte("accesskey"),
47 []byte("align"),
48 []byte("alt"),
49 []byte("autocomplete"),
50 []byte("charset"),
51 []byte("checked"),
52 []byte("class"),
53 []byte("content"),
54 []byte("contenteditable"),
55 []byte("contextmenu"),
56 []byte("dir"),
57 []byte("for"),
58 []byte("height"),
59 []byte("hidden"),
60 []byte("id"),
61 []byte("lang"),
62 []byte("media"),
63 []byte("method"),
64 []byte("name"),
65 []byte("nowrap"),
66 []byte("placeholder"),
67 []byte("property"),
68 []byte("rel"),
69 []byte("spellcheck"),
70 []byte("tabindex"),
71 []byte("target"),
72 []byte("title"),
73 []byte("translate"),
74 []byte("type"),
75 []byte("value"),
76 []byte("width"),
77}
78
79var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
80 []byte("area"),
81 []byte("base"),
82 []byte("br"),
83 []byte("col"),
84 []byte("embed"),
85 []byte("hr"),
86 []byte("img"),
87 []byte("input"),
88 []byte("keygen"),
89 []byte("link"),
90 []byte("meta"),
91 []byte("param"),
92 []byte("source"),
93 []byte("track"),
94 []byte("wbr"),
95}
96
97type Proxy struct {
[4]98 Key []byte
99 RequestTimeout time.Duration
[1]100}
101
102type RequestConfig struct {
103 Key []byte
104 baseURL *url.URL
105}
106
[2]107var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]108
109var HTML_BODY_EXTENSION string = `
110<div id="mortyheader">
111 <input type="checkbox" id="mortytoggle" autocomplete="off" />
[5]112 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
[1]113</div>
114<style>
[5]115#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
[1]116#mortyheader a { color: #3498db; }
[5]117#mortyheader p { padding: 0; margin: 0; }
118#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
119#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
[1]120input[type=checkbox]#mortytoggle { display: none; }
121input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
122</style>
123`
124
125func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
126 requestHash := popRequestParam(ctx, []byte("mortyhash"))
127
128 requestURI := popRequestParam(ctx, []byte("mortyurl"))
129
130 if requestURI == nil {
131 p.breakOnError(ctx, errors.New(`missing "mortyurl" URL parameter`))
132 return
133 }
134
135 if p.Key != nil {
136 if !verifyRequestURI(requestURI, requestHash, p.Key) {
137 p.breakOnError(ctx, errors.New("invalid hash"))
138 return
139 }
140 }
141
142 parsedURI, err := url.Parse(string(requestURI))
143
144 if p.breakOnError(ctx, err) {
145 return
146 }
147
148 req := fasthttp.AcquireRequest()
149 defer fasthttp.ReleaseRequest(req)
150
151 reqQuery := parsedURI.Query()
152 ctx.QueryArgs().VisitAll(func(key, value []byte) {
153 reqQuery.Add(string(key), string(value))
154 })
155
156 parsedURI.RawQuery = reqQuery.Encode()
157
158 uriStr := parsedURI.String()
159
160 log.Println("getting", uriStr)
161
162 req.SetRequestURI(uriStr)
163 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
164
165 resp := fasthttp.AcquireResponse()
166 defer fasthttp.ReleaseResponse(resp)
167
168 req.Header.SetMethodBytes(ctx.Method())
169 if ctx.IsPost() || ctx.IsPut() {
170 req.SetBody(ctx.PostBody())
171 }
172
[4]173 if p.breakOnError(ctx, CLIENT.DoTimeout(req, resp, p.RequestTimeout)) {
[1]174 return
175 }
176
177 if resp.StatusCode() != 200 {
178 switch resp.StatusCode() {
[7]179 case 301, 302, 303, 307, 308:
[1]180 loc := resp.Header.Peek("Location")
181 if loc != nil {
182 url, err := proxifyURI(&RequestConfig{p.Key, parsedURI}, string(loc))
183 if err == nil {
184 ctx.SetStatusCode(resp.StatusCode())
185 ctx.Response.Header.Add("Location", url)
186 log.Println("redirect to", string(loc))
187 return
188 }
189 }
190 }
191 log.Println("invalid request:", resp.StatusCode())
192 return
193 }
194
195 contentType := resp.Header.Peek("Content-Type")
196
197 if contentType == nil {
198 p.breakOnError(ctx, errors.New("invalid content type"))
199 return
200 }
201
202 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
203
204 var responseBody []byte
205
206 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
207 var err error
208 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
209 if p.breakOnError(ctx, err) {
210 return
211 }
212 } else {
213 responseBody = resp.Body()
214 }
215
216 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
217
218 switch {
219 case bytes.Contains(contentType, []byte("css")):
220 sanitizeCSS(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
221 case bytes.Contains(contentType, []byte("html")):
222 sanitizeHTML(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
223 default:
224 ctx.Write(responseBody)
225 }
226}
227
228func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
229 param := ctx.QueryArgs().PeekBytes(paramName)
230
231 if param == nil {
232 param = ctx.PostArgs().PeekBytes(paramName)
233 if param != nil {
234 ctx.PostArgs().DelBytes(paramName)
235 }
236 } else {
237 ctx.QueryArgs().DelBytes(paramName)
238 }
239
240 return param
241}
242
[9]243func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]244 // TODO
245
246 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
247
248 if urlSlices == nil {
[9]249 out.Write(css)
[1]250 return
251 }
252
253 startIndex := 0
254
255 for _, s := range urlSlices {
[8]256 urlStart := s[6]
257 urlEnd := s[7]
[1]258
259 if uri, err := proxifyURI(rc, string(css[urlStart:urlEnd])); err == nil {
[9]260 out.Write(css[startIndex:urlStart])
261 out.Write([]byte(uri))
[1]262 startIndex = urlEnd
263 } else {
264 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
265 }
266 }
267 if startIndex < len(css) {
[9]268 out.Write(css[startIndex:len(css)])
[1]269 }
270}
271
[9]272func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]273 r := bytes.NewReader(htmlDoc)
274 decoder := html.NewTokenizer(r)
275 decoder.AllowCDATA(true)
276
277 unsafeElements := make([][]byte, 0, 8)
278 state := STATE_DEFAULT
279
280 for {
281 token := decoder.Next()
282 if token == html.ErrorToken {
283 err := decoder.Err()
284 if err != io.EOF {
285 log.Println("failed to parse HTML:")
286 }
287 break
288 }
289
290 if len(unsafeElements) == 0 {
291
292 switch token {
293 case html.StartTagToken, html.SelfClosingTagToken:
294 tag, hasAttrs := decoder.TagName()
295 safe := !inArray(tag, UNSAFE_ELEMENTS)
296 if !safe {
297 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
298 var unsafeTag []byte = make([]byte, len(tag))
299 copy(unsafeTag, tag)
300 unsafeElements = append(unsafeElements, unsafeTag)
301 }
302 break
303 }
304 if bytes.Equal(tag, []byte("noscript")) {
305 state = STATE_IN_NOSCRIPT
306 break
307 }
308 var attrs [][][]byte
[9]309 fmt.Fprintf(out, "<%s", tag)
[1]310 if hasAttrs {
311 for {
312 attrName, attrValue, moreAttr := decoder.TagAttr()
313 attrs = append(attrs, [][]byte{attrName, attrValue})
314 if !moreAttr {
315 break
316 }
317 }
318 if bytes.Equal(tag, []byte("meta")) {
[9]319 sanitizeMetaAttrs(rc, out, attrs)
[1]320 } else {
[9]321 sanitizeAttrs(rc, out, attrs)
[1]322 }
323 }
324 if token == html.SelfClosingTagToken {
[9]325 fmt.Fprintf(out, " />")
[1]326 } else {
[9]327 fmt.Fprintf(out, ">")
[1]328 if bytes.Equal(tag, []byte("style")) {
329 state = STATE_IN_STYLE
330 }
331 }
332 if bytes.Equal(tag, []byte("form")) {
333 var formURL *url.URL
334 for _, attr := range attrs {
335 if bytes.Equal(attr[0], []byte("action")) {
336 formURL, _ = url.Parse(string(attr[1]))
337 mergeURIs(rc.baseURL, formURL)
338 break
339 }
340 }
341 if formURL == nil {
342 formURL = rc.baseURL
343 }
[2]344 urlStr := formURL.String()
345 var key string
346 if rc.Key != nil {
347 key = hash(urlStr, rc.Key)
348 }
[9]349 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]350
351 }
352
353 case html.EndTagToken:
354 tag, _ := decoder.TagName()
355 writeEndTag := true
356 switch string(tag) {
357 case "body":
[9]358 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.baseURL.String())
[1]359 case "style":
360 state = STATE_DEFAULT
361 case "noscript":
362 state = STATE_DEFAULT
363 writeEndTag = false
364 }
365 // skip noscript tags - only the tag, not the content, because javascript is sanitized
366 if writeEndTag {
[9]367 fmt.Fprintf(out, "</%s>", tag)
[1]368 }
369
370 case html.TextToken:
371 switch state {
372 case STATE_DEFAULT:
[9]373 fmt.Fprintf(out, "%s", decoder.Raw())
[1]374 case STATE_IN_STYLE:
[9]375 sanitizeCSS(rc, out, decoder.Raw())
[1]376 case STATE_IN_NOSCRIPT:
[9]377 sanitizeHTML(rc, out, decoder.Raw())
[1]378 }
379
380 case html.DoctypeToken, html.CommentToken:
[9]381 out.Write(decoder.Raw())
[1]382 }
383 } else {
384 switch token {
385 case html.StartTagToken:
386 tag, _ := decoder.TagName()
387 if inArray(tag, UNSAFE_ELEMENTS) {
388 unsafeElements = append(unsafeElements, tag)
389 }
390
391 case html.EndTagToken:
392 tag, _ := decoder.TagName()
393 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
394 unsafeElements = unsafeElements[:len(unsafeElements)-1]
395 }
396 }
397 }
398 }
399}
400
[9]401func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]402 var http_equiv []byte
403 var content []byte
404
405 for _, attr := range attrs {
406 attrName := attr[0]
407 attrValue := attr[1]
408 if bytes.Equal(attrName, []byte("http-equiv")) {
409 http_equiv = bytes.ToLower(attrValue)
410 }
411 if bytes.Equal(attrName, []byte("content")) {
412 content = attrValue
413 }
414 }
415
416 if bytes.Equal(http_equiv, []byte("refresh")) && bytes.Index(content, []byte(";url=")) != -1 {
417 parts := bytes.SplitN(content, []byte(";url="), 2)
418 if uri, err := proxifyURI(rc, string(parts[1])); err == nil {
[9]419 fmt.Fprintf(out, ` http-equiv="refresh" content="%s;%s"`, parts[0], uri)
[1]420 }
421 } else {
[9]422 sanitizeAttrs(rc, out, attrs)
[1]423 }
424
425}
426
[9]427func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]428 for _, attr := range attrs {
[9]429 sanitizeAttr(rc, out, attr[0], attr[1])
[1]430 }
431}
432
[9]433func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue []byte) {
[1]434 if inArray(attrName, SAFE_ATTRIBUTES) {
[9]435 fmt.Fprintf(out, " %s=\"%s\"", attrName, attrValue)
[1]436 return
437 }
438 switch string(attrName) {
439 case "src", "href", "action":
440 if uri, err := proxifyURI(rc, string(attrValue)); err == nil {
[9]441 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]442 } else {
443 log.Println("cannot proxify uri:", attrValue)
444 }
445 case "style":
[9]446 fmt.Fprintf(out, " %s=\"", attrName)
447 sanitizeCSS(rc, out, attrValue)
448 out.Write([]byte("\""))
[1]449 }
450}
451
452func mergeURIs(u1, u2 *url.URL) {
453 if u2.Scheme == "" || u2.Scheme == "//" {
454 u2.Scheme = u1.Scheme
455 }
[3]456 if u2.Host == "" && u1.Path != "" {
[1]457 u2.Host = u1.Host
458 if len(u2.Path) == 0 || u2.Path[0] != '/' {
459 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
460 }
461 }
462}
463
464func proxifyURI(rc *RequestConfig, uri string) (string, error) {
465 // TODO check malicious data: - e.g. data:script
466 if strings.HasPrefix(uri, "data:") {
467 return uri, nil
468 }
469
470 if len(uri) > 0 && uri[0] == '#' {
471 return uri, nil
472 }
473
474 u, err := url.Parse(uri)
475 if err != nil {
476 return "", err
477 }
478 mergeURIs(rc.baseURL, u)
479
480 uri = u.String()
481
482 if rc.Key == nil {
483 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
484 }
485 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
486}
487
488func inArray(b []byte, a [][]byte) bool {
489 for _, b2 := range a {
490 if bytes.Equal(b, b2) {
491 return true
492 }
493 }
494 return false
495}
496
497func hash(msg string, key []byte) string {
498 mac := hmac.New(sha256.New, key)
499 mac.Write([]byte(msg))
500 return hex.EncodeToString(mac.Sum(nil))
501}
502
503func verifyRequestURI(uri, hashMsg, key []byte) bool {
504 h := make([]byte, hex.DecodedLen(len(hashMsg)))
505 _, err := hex.Decode(h, hashMsg)
506 if err != nil {
507 log.Println("hmac error:", err)
508 return false
509 }
510 mac := hmac.New(sha256.New, key)
511 mac.Write(uri)
512 return hmac.Equal(h, mac.Sum(nil))
513}
514
515func (p *Proxy) breakOnError(ctx *fasthttp.RequestCtx, err error) bool {
516 if err == nil {
517 return false
518 }
[4]519 log.Println("error:", err)
[1]520 ctx.SetStatusCode(404)
521 ctx.SetContentType("text/html")
522 ctx.Write([]byte(`<!doctype html>
523<head>
524<title>MortyError</title>
525</head>
526<body><h2>Error!</h2>`))
527 ctx.Write([]byte("<h3>"))
528 ctx.Write([]byte(html.EscapeString(err.Error())))
529 ctx.Write([]byte("</h3>"))
530 if p.Key == nil {
531 ctx.Write([]byte(`
532<form action="post">
533 Visit url: <input placeholder="https://url.." name="mortyurl" />
534 <input type="submit" value="go" />
535</form>`))
536 }
537 ctx.Write([]byte(`
538</body>
539</html>`))
540 return true
541}
542
543func main() {
544
[2]545 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]546 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[4]547 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]548 flag.Parse()
549
[4]550 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]551
552 if *key != "" {
553 p.Key = []byte(*key)
554 }
555
556 log.Println("listening on", *listen)
557
558 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
559 log.Fatal("Error in ListenAndServe:", err)
560 }
561}
Note: See TracBrowser for help on using the repository browser.