{-# LANGUAGE CPP #-}
{- |
Unicode aware URI encoding and decoding functions for both String and Text.
Although standards are pretty vague about Unicode in URIs most browsers are
pretty straightforward when encoding URIs: Encode a text to a UTF-8 string and
URI-encode every individual byte (not character).
-}

module Network.URI.Encode
  ( encode
  , encodeWith
  , encodeText
  , encodeTextWith
  , encodeTextToBS
  , encodeTextToBSWith
  , encodeByteString
  , encodeByteStringWith

  , decode
  , decodeText
  , decodeBSToText
  , decodeByteString

  , isAllowed
  ) where

import Data.Text
import Network.URI
import qualified Data.ByteString.Char8 as U
import qualified Data.ByteString.UTF8  as U

-------------------------------------------------------------------------------
-- | URI encode a 'String', unicode aware.

encode :: String -> String
encode :: String -> String
encode = (Char -> Bool) -> String -> String
encodeWith Char -> Bool
isAllowed

-- | URI encode a 'String', unicode aware, using the predicate to
-- decide which characters are escaped ('False' means escape).

encodeWith :: (Char -> Bool) -> String -> String
encodeWith :: (Char -> Bool) -> String -> String
encodeWith Char -> Bool
predicate = (Char -> Bool) -> String -> String
escapeURIString Char -> Bool
predicate forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
fixUtf8

-- | URI decode a 'String', unicode aware.

decode :: String -> String
decode :: String -> String
decode = String -> String
unfixUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
unEscapeString

-------------------------------------------------------------------------------
-- | URI encode a 'Text', unicode aware.

encodeText :: Text -> Text
encodeText :: Text -> Text
encodeText = String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
encode forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack

-- | URI encode a 'Text', unicode aware, using the predicate to
-- decide which characters are escaped ('False' means escape).

encodeTextWith :: (Char -> Bool) -> Text -> Text
encodeTextWith :: (Char -> Bool) -> Text -> Text
encodeTextWith Char -> Bool
predicate = String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
encodeWith Char -> Bool
predicate forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack

-- | URI decode a 'Text', unicode aware.

decodeText :: Text -> Text
decodeText :: Text -> Text
decodeText = String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
decode forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack

-------------------------------------------------------------------------------
-- | URI encode a 'Text' into a 'ByteString', unicode aware.

encodeTextToBS :: Text -> U.ByteString
encodeTextToBS :: Text -> ByteString
encodeTextToBS = String -> ByteString
U.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
encode forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack

-- | URI encode a 'Text' into a 'ByteString', unicode aware, using the
-- predicate to decide which characters are escaped ('False' means escape).

encodeTextToBSWith :: (Char -> Bool) -> Text -> U.ByteString
encodeTextToBSWith :: (Char -> Bool) -> Text -> ByteString
encodeTextToBSWith Char -> Bool
predicate = String -> ByteString
U.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
encodeWith Char -> Bool
predicate forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack

-- | URI decode a 'ByteString' into a 'Text', unicode aware.

decodeBSToText :: U.ByteString -> Text
decodeBSToText :: ByteString -> Text
decodeBSToText = String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
decode forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
U.unpack

-------------------------------------------------------------------------------
-- | URI encode a UTF8-encoded 'ByteString' into a 'ByteString', unicode aware.

encodeByteString :: U.ByteString -> U.ByteString
encodeByteString :: ByteString -> ByteString
encodeByteString = String -> ByteString
U.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
encode forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
U.toString

-- | URI encode a UTF8-encoded 'ByteString into a 'ByteString', unicode aware,
-- using the predicate to decide which characters are escaped ('False' means
-- escape).

encodeByteStringWith :: (Char -> Bool) -> U.ByteString -> U.ByteString
encodeByteStringWith :: (Char -> Bool) -> ByteString -> ByteString
encodeByteStringWith Char -> Bool
predicate = String -> ByteString
U.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
encodeWith Char -> Bool
predicate forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
U.unpack

-- | URI decode a 'ByteString' into a UTF8-encoded 'ByteString', unicode aware.

decodeByteString :: U.ByteString -> U.ByteString
decodeByteString :: ByteString -> ByteString
decodeByteString = String -> ByteString
U.fromString forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
decode forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
U.unpack

-------------------------------------------------------------------------------
-- | Is a character allowed in a URI. Only ASCII alphabetic
-- characters, decimal digits, and - _ . ~ are allowed. This is
-- following RFC 3986.

isAllowed :: Char -> Bool
isAllowed :: Char -> Bool
isAllowed = Char -> Bool
isUnreserved

-------------------------------------------------------------------------------
-- | "Fix" a String before encoding. This actually breaks the string,
-- by changing unicode characters into their byte pairs. For network
-- \>= 2.4, this is the identity, since that correctly handles unicode
-- characters.
fixUtf8 :: String -> String
#ifdef MIN_VERSION_network
#if MIN_VERSION_network(2,4,0)
fixUtf8 = id
#else
fixUtf8 = U.unpack . U.fromString
#endif
#else
fixUtf8 :: String -> String
fixUtf8 = forall a. a -> a
id
#endif

-- | "Unfix" a String again. For network \>= 2.4.1.1 this is the
-- identity, since that correctly handles unicode characters. Note
-- that network 2.4.1.0 (one that is still broken) cannot be excluded
-- by CPP, so this version is excluded in the cabal dependencies.
unfixUtf8 :: String -> String
#ifdef MIN_VERSION_network
#if MIN_VERSION_network(2,4,1)
unfixUtf8 = id
#else
unfixUtf8 = U.toString . U.pack
#endif
#else
unfixUtf8 :: String -> String
unfixUtf8 = forall a. a -> a
id
#endif