From 97492adcb97a1c99fbb0bb269628e49a7ebb5956 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Hamal=20Dvo=C5=99=C3=A1k?= <mordae@anilinux.org>
Date: Wed, 2 Jun 2021 15:52:33 +0200
Subject: [PATCH] Fix & improve media handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jan Hamal Dvořák <mordae@anilinux.org>
---
 hikaru.cabal           |  1 +
 lib/Hikaru/Action.hs   | 53 +++++++++++++++++++++++--------
 lib/Hikaru/Dispatch.hs | 25 ++++++++-------
 lib/Hikaru/Media.hs    | 72 +++++++++++++++++++++++++-----------------
 lib/Hikaru/Route.hs    | 42 ++++++++++++------------
 5 files changed, 119 insertions(+), 74 deletions(-)

diff --git a/hikaru.cabal b/hikaru.cabal
index aeeb1dc..1107d44 100644
--- a/hikaru.cabal
+++ b/hikaru.cabal
@@ -72,6 +72,7 @@ common common
 
     build-depends:
       , aeson              >=1.4  && <1.6
+      , attoparsec         >=0.13 && <0.15
       , base               >=4.13 && <5
       , binary             >=0.8  && <0.9
       , bytestring         >=0.10 && <0.12
diff --git a/lib/Hikaru/Action.hs b/lib/Hikaru/Action.hs
index d7242b9..17f0289 100644
--- a/lib/Hikaru/Action.hs
+++ b/lib/Hikaru/Action.hs
@@ -361,49 +361,76 @@ where
 
 
   -- |
-  -- Obtain the Accept header value or the default value of @\"*/*\"@.
+  -- Obtain the 'hAccept' header value or the default value of @\"*/*\"@.
+  --
+  -- Aborts with 'badRequest400' if the header fails to parse.
   --
   getAccept :: (MonadAction m) => m [Media]
-  getAccept = parseMedia <$> cs . fromMaybe "*/*"
-                         <$> getHeaderMaybe hAccept
+  getAccept = do
+    value <- getHeaderDefault hAccept "*/*"
+
+    case parseMedia (cs value) of
+      Left _reason -> abort badRequest400 [] "Failed to parse Accept."
+      Right media  -> return media
 
 
   -- |
   -- Obtain the Accept-Charset header value or the default value of @\"*\"@.
   --
+  -- Aborts with 'badRequest400' if the header fails to parse.
+  --
   getAcceptCharset :: (MonadAction m) => m [Media]
-  getAcceptCharset = parseMedia <$> cs . fromMaybe "*"
-                                <$> getHeaderMaybe hAcceptCharset
+  getAcceptCharset = do
+    value <- getHeaderDefault hAcceptCharset "*"
+
+    case parseMedia (cs value) of
+      Left _reason -> abort badRequest400 [] "Failed to parse Accept-Charset."
+      Right media  -> return media
 
 
   -- |
   -- Obtain the Accept-Encoding header value or the default
   -- value of @\"identity,*;q=0\"@.
   --
+  -- Aborts with 'badRequest400' if the header fails to parse.
+  --
   getAcceptEncoding :: (MonadAction m) => m [Media]
-  getAcceptEncoding = parseMedia <$> cs . fromMaybe "identity,*;q=0"
-                                 <$> getHeaderMaybe hAcceptEncoding
+  getAcceptEncoding = do
+    value <- getHeaderDefault hAcceptEncoding "identity,*;q=0"
+
+    case parseMedia (cs value) of
+      Left _reason -> abort badRequest400 [] "Failed to parse Accept-Encoding."
+      Right media  -> return media
 
 
   -- |
   -- Obtain the Accept-Language header value or the default value of @\"*\"@.
   --
+  -- Aborts with 'badRequest400' if the header fails to parse.
+  --
   getAcceptLanguage :: (MonadAction m) => m [Media]
-  getAcceptLanguage = parseMedia <$> cs . fromMaybe "*/*"
-                                 <$> getHeaderMaybe hAcceptLanguage
+  getAcceptLanguage = do
+    value <- getHeaderDefault hAcceptLanguage "*"
+
+    case parseMedia (cs value) of
+      Left _reason -> abort badRequest400 [] "Failed to parse Accept-Language."
+      Right media  -> return media
 
 
   -- |
   -- Obtain the Content-Type header value or the default value of
   -- @\"application/octet-stream\"@ (always true, but meaningless).
   --
+  -- Aborts with 'badRequest400' if the header fails to parse.
+  --
   getContentType :: (MonadAction m) => m Media
   getContentType = do
-    media <- fmap parseMedia <$> fmap cs <$> getHeaderMaybe hContentType
+    value <- getHeaderDefault hContentType "application/octet-stream"
 
-    case media of
-      Just (x:_) -> return x
-      _else      -> return "application/octet-stream"
+    case parseMedia (cs value) of
+      Left _reason -> abort badRequest400 [] "Failed to parse Content-Type."
+      Right []     -> abort badRequest400 [] "Empty Content-Type."
+      Right (m:_)  -> return m
 
 
   -- |
diff --git a/lib/Hikaru/Dispatch.hs b/lib/Hikaru/Dispatch.hs
index 5a8c0d6..f19237e 100644
--- a/lib/Hikaru/Dispatch.hs
+++ b/lib/Hikaru/Dispatch.hs
@@ -138,21 +138,24 @@ where
 
 
   err :: Score -> Application
-  err BadRequest           = respond status400
-  err NotFound             = respond status404
-  err MethodNotAllowed     = respond status405
-  err UpgradeRequired      = respond status426
-  err NotAcceptable        = respond status406
-  err LengthRequired       = respond status411
-  err UnsupportedMediaType = respond status415
+  err (BadRequest reason)  = respond status400 (Just reason)
+  err NotFound             = respond status404 Nothing
+  err MethodNotAllowed     = respond status405 Nothing
+  err UpgradeRequired      = respond status426 Nothing
+  err NotAcceptable        = respond status406 Nothing
+  err LengthRequired       = respond status411 Nothing
+  err UnsupportedMediaType = respond status415 Nothing
   err (Suitable _)         = error "BUG: errored out with a Suitable"
 
 
-  respond :: Status -> Application
-  respond st@Status{..} _ sink = sink $ responseLBS st hdr msg
+  respond :: Status -> Maybe Text -> Application
+  respond st@Status{..} msg _ sink = sink $ responseLBS st hdr msg'
     where
-      hdr = [(hContentType, "text/plain")]
-      msg = cs (show statusCode) <> " " <> cs statusMessage
+      hdr  = [(hContentType, "text/plain")]
+
+      msg' = case msg of
+               Nothing -> cs (show statusCode) <> " " <> cs statusMessage
+               Just m  -> cs m
 
 
   handlerMW :: (h -> Application) -> [(Int, Response -> h)] -> Middleware
diff --git a/lib/Hikaru/Media.hs b/lib/Hikaru/Media.hs
index 028d13d..5ba79c3 100644
--- a/lib/Hikaru/Media.hs
+++ b/lib/Hikaru/Media.hs
@@ -25,13 +25,12 @@ module Hikaru.Media
   , selectMedia
   )
 where
-  import Praha hiding (many)
-
-  import Data.Text (toLower)
+  import Praha
 
   import Data.List (filter, lookup, sortOn)
-  import Text.ParserCombinators.ReadP
-  import Data.Char (isControl, isSpace)
+
+  import Data.Char
+  import Data.Attoparsec.Text
 
 
   -- |
@@ -57,13 +56,13 @@ where
   --
   instance IsString Media where
     fromString str = case parseMedia (cs str) of
-                       m:_   -> m
-                       _else -> error $ "Failed to parse media " <> show str
+                       Left reason -> error reason
+                       Right []    -> error "no media given"
+                       Right (m:_) -> m
 
 
   -- |
   -- Try to parse a comma-separated media type list.
-  -- Media that fail to parse are simply omitted.
   --
   -- Example:
   --
@@ -72,24 +71,21 @@ where
   -- , Media { mainType = "text", subType = "plain", quality = 0.7, params = [] }
   -- ]
   --
-  parseMedia :: Text -> [Media]
-  parseMedia text = case readP_to_S pMediaList (cs (toLower text)) of
-                      (m, ""):_ -> sortOn (negate . quality) m
-                      _else     -> []
+  parseMedia :: Text -> Either String [Media]
+  parseMedia = parseOnly (pMediaList <* endOfInput)
 
 
   -- |
   -- Parser for the media list coded mostly to the RFC 2045.
-  -- Input is always lowercased and unicode is accepted.
   --
-  pMediaList :: ReadP [Media]
-  pMediaList = sepBy pMedia pSeparator <* eof
+  pMediaList :: Parser [Media]
+  pMediaList = pMedia `sepBy` pSeparator
     where
+      pMedia :: Parser Media
       pMedia = do
-        mainType <- cs <$> pToken
-        _        <- char '/'
-        subType  <- cs <$> pToken
-        params   <- many pParameter
+        mainType <- pToken
+        subType  <- (char '/' *> pToken) <|> string ""
+        params   <- many' pParameter
 
         let quality = fromMaybe 1.0 do
                         q <- lookup "q" params
@@ -97,24 +93,42 @@ where
 
         return Media{..}
 
+      pParameter :: Parser (Text, Text)
       pParameter = do
         _     <- pSpaced $ char ';'
-        name  <- cs <$> pToken
+        name  <- pToken
         _     <- pSpaced $ char '='
-        value <- cs <$> pValue
+        value <- pValue
         return (name, value)
 
+      pToken :: Parser Text
+      pToken = do
+        pSpaced $ takeTill isSpecial
+
+      pSeparator :: Parser Char
       pSeparator = pSpaced $ char ','
-      pToken     = pSpaced $ many1 (satisfy (not . quote))
-      pValue     = pToken <++ pQuotedStr
-      pQuotedStr = pSpaced $ pQuoted $ many (pExcept '\\')
 
-      pExcept c  = satisfy (c /=)
-      pSpaced p  = skipSpaces *> p <* skipSpaces
-      pQuoted p  = char '"' *> p <* char '"'
+      pValue :: Parser Text
+      pValue = pToken <|> pQuotedStr
+
+      pQuotedStr :: Parser Text
+      pQuotedStr = pSpaced $ pQuoted $ takeTill (== '\\')
+
+      pSpaced :: Parser a -> Parser a
+      pSpaced p = skipSpace *> p <* skipSpace
+
+      pQuoted :: Parser a -> Parser a
+      pQuoted p = char '"' *> p <* char '"'
 
-      quote c    = isControl c || isSpace c || c `elem` specials
-      specials   = "()<>@,;:\\\"/[]?=" :: [Char]
+      isSpecial :: (Char -> Bool)
+      isSpecial c = isControl c || isSpace c
+                    || c == '(' || c == ')'
+                    || c == '<' || c == '>'
+                    || c == '@' || c == ',' || c == ';'
+                    || c == ':' || c == '\\'
+                    || c == '"' || c == '/'
+                    || c == '[' || c == ']'
+                    || c == '?' || c == '='
 
 
   -- |
diff --git a/lib/Hikaru/Route.hs b/lib/Hikaru/Route.hs
index a51b7b4..6899bd4 100644
--- a/lib/Hikaru/Route.hs
+++ b/lib/Hikaru/Route.hs
@@ -125,7 +125,7 @@ where
   -- route across multiple appraisals.
   --
   data Score
-    = BadRequest
+    = BadRequest Text
     | NotFound
     | MethodNotAllowed
     | UpgradeRequired
@@ -351,11 +351,11 @@ where
   acceptContent media = Appraisal {vary = [hContentType], score}
     where
       score req = do
-        let header = parseMedia (cs $ getContentType req)
-
-        case selectMedia media header of
-          Just Media{..} -> Suitable quality
-          Nothing        -> UnsupportedMediaType
+        case parseMedia (cs $ getContentType req) of
+          Left _reason -> BadRequest "Failed to parse Content-Type."
+          Right header -> case selectMedia media header of
+                            Just Media{..} -> Suitable quality
+                            Nothing        -> UnsupportedMediaType
 
 
   -- |
@@ -398,11 +398,11 @@ where
   offerContent media = Appraisal {vary = [hAccept], score}
     where
       score req = do
-        let header = parseMedia (cs $ getAccept req)
-
-        case selectMedia media header of
-          Just Media{..} -> Suitable quality
-          Nothing        -> NotAcceptable
+        case parseMedia (cs $ getAccept req) of
+          Left _reason -> BadRequest "Failed to parse Accept."
+          Right header -> case selectMedia media header of
+                            Just Media{..} -> Suitable quality
+                            Nothing        -> NotAcceptable
 
 
   -- |
@@ -448,11 +448,11 @@ where
   offerEncoding media = Appraisal {vary = [hAcceptEncoding], score}
     where
       score req = do
-        let header = parseMedia (cs $ getAcceptEncoding req)
-
-        case selectMedia media header of
-          Just Media{..} -> Suitable quality
-          Nothing        -> NotAcceptable
+        case parseMedia (cs $ getAcceptEncoding req) of
+          Left _reason -> BadRequest "Failed to parse Accept-Encoding."
+          Right header -> case selectMedia media header of
+                            Just Media{..} -> Suitable quality
+                            Nothing        -> NotAcceptable
 
 
   -- |
@@ -465,11 +465,11 @@ where
   offerLanguage media = Appraisal {vary = [hAcceptLanguage], score}
     where
       score req = do
-        let header = parseMedia (cs $ getAcceptLanguage req)
-
-        case selectMedia media header of
-          Just Media{..} -> Suitable quality
-          Nothing        -> NotAcceptable
+        case parseMedia (cs $ getAcceptLanguage req) of
+          Left _reason -> BadRequest "Failed to parse Accept-Language."
+          Right header -> case selectMedia media header of
+                            Just Media{..} -> Suitable quality
+                            Nothing        -> NotAcceptable
 
 
   -- Request Utilities -------------------------------------------------------
-- 
GitLab