Changeset 63 in code


Ignore:
Timestamp:
Dec 15, 2016, 10:32:34 PM (8 years ago)
Author:
alex
Message:

[enh] parse and filter Content-Type.
svg, mathml, multipart, xml (because of namespace) are forbidden.
the charset parameters in Content-Type is only set when it is by the original server.
the */xhtml+* Content-Type : the conversion to UTF-8 is now done (it wasn't the case before).
string type is used because of the mime package API.

Location:
trunk
Files:
3 added
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/morty.go

    r62 r63  
    2121        "golang.org/x/net/html/charset"
    2222        "golang.org/x/text/encoding"
     23
     24        "github.com/dalf/morty/contenttype"
    2325)
    2426
     
    3436
    3537var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
     38
     39// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Complete_list_of_MIME_types
     40// https://www.w3.org/TR/2009/WD-MathML3-20090604/mathml.pdf
     41// http://planetsvg.com/tools/mime.php
     42var FORBIDDEN_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
     43        // javascript
     44        contenttype.NewFilterContains("javascript"),
     45        contenttype.NewFilterContains("ecmascript"),
     46        contenttype.NewFilterEquals("application", "js", "*"),
     47        // no xml (can contain xhtml or css)
     48        contenttype.NewFilterEquals("text", "xml", "*"),
     49        contenttype.NewFilterEquals("text", "xml-external-parsed-entity", "*"),
     50        contenttype.NewFilterEquals("application", "xml", "*"),
     51        contenttype.NewFilterEquals("application", "xml-external-parsed-entity", "*"),
     52        contenttype.NewFilterEquals("application", "xslt", "xml"),
     53        // no mathml
     54        contenttype.NewFilterEquals("application", "mathml", "xml"),
     55        contenttype.NewFilterEquals("application", "mathml-presentation", "xml"),
     56        contenttype.NewFilterEquals("application", "mathml-content", "xml"),
     57        // no svg
     58        contenttype.NewFilterEquals("image", "svg", "xml"),
     59        contenttype.NewFilterEquals("image", "svg-xml", "*"),
     60        // no cache
     61        contenttype.NewFilterEquals("text", "cache-manifest", "*"),
     62        // no multipart
     63        contenttype.NewFilterEquals("multipart", "*", "*"),
     64        // no xul
     65        contenttype.NewFilterEquals("application", "vnd.mozilla.xul", "xml"),
     66        // no htc
     67        contenttype.NewFilterEquals("text", "x-component", "*"),
     68        // no flash
     69        contenttype.NewFilterEquals("application", "x-shockwave-flash", "*"),
     70        contenttype.NewFilterEquals("video", "x-flv", ""),
     71        contenttype.NewFilterEquals("video", "vnd.sealed-swf", ""),
     72        // no know format to have issues
     73        contenttype.NewFilterEquals("image", "wmf", "*"),
     74        contenttype.NewFilterEquals("image", "emf", "*"),
     75        // some of the microsoft and IE mime types
     76        contenttype.NewFilterEquals("text", "vbs", "*"),
     77        contenttype.NewFilterEquals("text", "vbscript", "*"),
     78        contenttype.NewFilterEquals("text", "scriptlet", "*"),
     79        contenttype.NewFilterEquals("application", "x-vbs", "*"),
     80        contenttype.NewFilterEquals("application", "olescript", "*"),
     81        contenttype.NewFilterEquals("application", "x-msmetafile", "*"),
     82        // no css (sometime, rendering depend on the browser)
     83        contenttype.NewFilterEquals("application", "x-pointplus", "*"),
     84})
     85
     86var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
     87        "charset": true,
     88}
    3689
    3790var UNSAFE_ELEMENTS [][]byte = [][]byte{
     
    252305        }
    253306
    254         contentType := resp.Header.Peek("Content-Type")
    255 
    256         if contentType == nil {
     307        contentTypeBytes := resp.Header.Peek("Content-Type")
     308
     309        if contentTypeBytes == nil {
    257310                // HTTP status code 503 : Service Unavailable
    258311                p.serveMainPage(ctx, 503, errors.New("invalid content type"))
     
    260313        }
    261314
    262         if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
     315        contentTypeString := string(contentTypeBytes)
     316
     317        // decode Content-Type header
     318        contentType, error := contenttype.ParseContentType(contentTypeString)
     319        if error != nil {
     320                // HTTP status code 503 : Service Unavailable
     321                p.serveMainPage(ctx, 503, errors.New("invalid content type"))
     322                return
     323        }
     324
     325        // deny access to forbidden content type
     326        if FORBIDDEN_CONTENTTYPE_FILTER(contentType) {
    263327                // HTTP status code 403 : Forbidden
    264328                p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
     
    266330        }
    267331
    268         contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
    269 
     332        // HACK : replace */xhtml by text/html
     333        if contentType.SubType == "xhtml" {
     334                contentType.TopLevelType = "text"
     335                contentType.SubType = "html"
     336                contentType.Suffix = ""
     337        }
     338
     339        // conversion to UTF-8
    270340        var responseBody []byte
    271341
    272         if len(contentInfo) == 2 && bytes.Contains(contentInfo[0], []byte("text")) {
    273                 e, ename, _ := charset.DetermineEncoding(resp.Body(), string(contentType))
     342        if contentType.TopLevelType == "text" {
     343                e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
    274344                if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
    275345                        responseBody, err = e.NewDecoder().Bytes(resp.Body())
     
    282352                        responseBody = resp.Body()
    283353                }
     354                // update the charset or specify it
     355                contentType.Parameters["charset"] = "UTF-8"
    284356        } else {
    285357                responseBody = resp.Body()
    286358        }
    287359
    288         if bytes.Contains(contentType, []byte("xhtml")) {
    289                 ctx.SetContentType("text/html; charset=UTF-8")
    290         } else {
    291                 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
    292         }
     360        //
     361        contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
     362
     363        // set the content type
     364        ctx.SetContentType(contentType.String())
    293365
    294366        switch {
    295         case bytes.Contains(contentType, []byte("css")):
     367        case contentType.SubType == "css" && contentType.Suffix == "":
    296368                sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
    297         case bytes.Contains(contentType, []byte("html")):
     369        case contentType.SubType == "html" && contentType.Suffix == "":
    298370                sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
    299371        default:
Note: See TracChangeset for help on using the changeset viewer.