{-# LANGUAGE DeriveDataTypeable #-}

module Text.HTML.TagSoup.Options where

import Data.Typeable
import Text.HTML.TagSoup.Type
import Text.HTML.TagSoup.Entity
import Text.StringLike


-- | These options control how 'parseTags' works. The 'ParseOptions' type is usually generated by one of
--   'parseOptions', 'parseOptionsFast' or 'parseOptionsEntities', then selected fields may be overriden.
--
--   The options 'optTagPosition' and 'optTagWarning' specify whether to generate
--   'TagPosition' or 'TagWarning' elements respectively. Usually these options should be set to @False@
--   to simplify future stages, unless you rely on position information or want to give malformed HTML
--   messages to the end user.
--
--   The options 'optEntityData' and 'optEntityAttrib' control how entities, for example @ @ are handled.
--   Both take a string, and a boolean, where @True@ indicates that the entity ended with a semi-colon @;@.
--   Inside normal text 'optEntityData' will be called, and the results will be inserted in the tag stream.
--   Inside a tag attribute 'optEntityAttrib' will be called, and the first component of the result will be used
--   in the attribute, and the second component will be appended after the 'TagOpen' value (usually the second
--   component is @[]@). As an example, to not decode any entities, pass:
--
-- > parseOptions
-- >     {optEntityData=\(str,b) -> [TagText $ "&" ++ str ++ [';' | b]]
-- >     ,optEntityAttrib\(str,b) -> ("&" ++ str ++ [';' | b], [])

--   The 'optTagTextMerge' value specifies if you always want adjacent 'TagText' values to be merged.
--   Merging adjacent pieces of text has a small performance penalty, but will usually make subsequent analysis
--   simpler. Contiguous runs of characters without entities or tags will also be generated as single 'TagText'
--   values.
data ParseOptions str = ParseOptions
    {forall str. ParseOptions str -> Bool
optTagPosition :: Bool -- ^ Should 'TagPosition' values be given before some items (default=False,fast=False).
    ,forall str. ParseOptions str -> Bool
optTagWarning :: Bool  -- ^ Should 'TagWarning' values be given (default=False,fast=False)
    ,forall str. ParseOptions str -> (str, Bool) -> [Tag str]
optEntityData :: (str,Bool) -> [Tag str] -- ^ How to lookup an entity (Bool = has ending @';'@)
    ,forall str. ParseOptions str -> (str, Bool) -> (str, [Tag str])
optEntityAttrib :: (str,Bool) -> (str,[Tag str]) -- ^ How to lookup an entity in an attribute (Bool = has ending @';'@?)
    ,forall str. ParseOptions str -> Bool
optTagTextMerge :: Bool -- ^ Require no adjacent 'TagText' values (default=True,fast=False)
    }
    deriving Typeable


-- | A 'ParseOptions' structure using a custom function to lookup attributes. Any attribute
--   that is not found will be left intact, and a 'TagWarning' given (if 'optTagWarning' is set).
--
--   If you do not want to resolve any entities, simpliy pass @const Nothing@ for the lookup function.
parseOptionsEntities :: StringLike str => (str -> Maybe str) -> ParseOptions str
parseOptionsEntities :: forall str.
StringLike str =>
(str -> Maybe str) -> ParseOptions str
parseOptionsEntities str -> Maybe str
lookupEntity = forall str.
Bool
-> Bool
-> ((str, Bool) -> [Tag str])
-> ((str, Bool) -> (str, [Tag str]))
-> Bool
-> ParseOptions str
ParseOptions Bool
False Bool
False (str, Bool) -> [Tag str]
entityData (str, Bool) -> (str, [Tag str])
entityAttrib Bool
True
    where
        entityData :: (str, Bool) -> [Tag str]
entityData (str, Bool)
x = forall str. str -> Tag str
TagText str
a forall a. a -> [a] -> [a]
: [Tag str]
b
            where (str
a,[Tag str]
b) = (str, Bool) -> (str, [Tag str])
entityAttrib (str, Bool)
x

        entityAttrib :: (str, Bool) -> (str, [Tag str])
entityAttrib ~(str
x,Bool
b) =
            let x' :: str
x' = str
x forall a. StringLike a => a -> a -> a
`append` forall a. IsString a => String -> a
fromString [Char
';'|Bool
b]
            in case str -> Maybe str
lookupEntity str
x' of
                Just str
y -> (str
y, [])
                Maybe str
Nothing -> (forall a. StringLike a => Char -> a
fromChar Char
'&' forall a. StringLike a => a -> a -> a
`append` str
x'
                           ,[forall str. str -> Tag str
TagWarning forall a b. (a -> b) -> a -> b
$ forall a. IsString a => String -> a
fromString String
"Unknown entity: " forall a. StringLike a => a -> a -> a
`append` str
x])


-- | The default parse options value, described in 'ParseOptions'. Equivalent to
--   @'parseOptionsEntities' 'lookupEntity'@.
parseOptions :: StringLike str => ParseOptions str
parseOptions :: forall str. StringLike str => ParseOptions str
parseOptions = forall str.
StringLike str =>
(str -> Maybe str) -> ParseOptions str
parseOptionsEntities forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. IsString a => String -> a
fromString forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe String
lookupEntity forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. StringLike a => a -> String
toString


-- | A 'ParseOptions' structure optimised for speed, following the fast options.
parseOptionsFast :: StringLike str => ParseOptions str
parseOptionsFast :: forall str. StringLike str => ParseOptions str
parseOptionsFast = forall str. StringLike str => ParseOptions str
parseOptions{optTagTextMerge :: Bool
optTagTextMerge=Bool
False}


-- | Change the underlying string type of a 'ParseOptions' value.
fmapParseOptions :: (StringLike from, StringLike to) => ParseOptions from -> ParseOptions to
fmapParseOptions :: forall from to.
(StringLike from, StringLike to) =>
ParseOptions from -> ParseOptions to
fmapParseOptions (ParseOptions Bool
a Bool
b (from, Bool) -> [Tag from]
c (from, Bool) -> (from, [Tag from])
d Bool
e) = forall str.
Bool
-> Bool
-> ((str, Bool) -> [Tag str])
-> ((str, Bool) -> (str, [Tag str]))
-> Bool
-> ParseOptions str
ParseOptions Bool
a Bool
b forall {b} {a}.
(StringLike b, StringLike a) =>
(a, Bool) -> [Tag b]
c2 forall {a} {b} {a}.
(StringLike a, StringLike b, StringLike a) =>
(a, Bool) -> (a, [Tag b])
d2 Bool
e
    where
        c2 :: (a, Bool) -> [Tag b]
c2 ~(a
x,Bool
y) = forall a b. (a -> b) -> [a] -> [b]
map (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (StringLike a, StringLike b) => a -> b
castString) forall a b. (a -> b) -> a -> b
$ (from, Bool) -> [Tag from]
c (forall a b. (StringLike a, StringLike b) => a -> b
castString a
x, Bool
y)
        d2 :: (a, Bool) -> (a, [Tag b])
d2 ~(a
x,Bool
y) = (forall a b. (StringLike a, StringLike b) => a -> b
castString from
r, forall a b. (a -> b) -> [a] -> [b]
map (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (StringLike a, StringLike b) => a -> b
castString) [Tag from]
s)
            where (from
r,[Tag from]
s) = (from, Bool) -> (from, [Tag from])
d (forall a b. (StringLike a, StringLike b) => a -> b
castString a
x, Bool
y)