{-# LANGUAGE OverloadedStrings, TupleSections, ViewPatterns #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
module Text.HTML.TagStream
  ( Token (..)
  , tokenStream
  ) where

import           Control.Applicative
import           Control.Monad (unless)
import           Control.Monad.Trans.Resource (MonadThrow)
import           Data.Char
import qualified Data.Conduit.List as CL

import           Data.Attoparsec.Text
import           Data.Conduit
import qualified Data.Conduit.Attoparsec as CA
import           Data.Maybe (fromMaybe, isJust)
import           Data.Monoid ((<>))
import           Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Builder as B
import qualified Text.XML.Stream.Parse as XML
import           Data.Map (Map)
import qualified Data.Map.Strict as Map
import           Control.Arrow (first)

data Token
  = TagOpen Text (Map Text Text) Bool
  | TagClose Text
  | Text Text
  | Comment Text
  | Special Text Text
  | Incomplete Text
  deriving (Token -> Token -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq, Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show)

data TagType
  = TagTypeClose
  | TagTypeSpecial
  | TagTypeNormal

{--
 - match quoted string, can fail.
 -}
quoted :: Char -> Parser Text
quoted :: Char -> Parser Text
quoted Char
q = Text -> Text -> Text
T.append forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'\\',Char
q))
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Char -> Parser Char
char Char
q forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
""
                      forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char Char
'\\' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser Text -> Parser Text
atLeast Int
1 (Char -> Parser Text
quoted Char
q) )

quotedOr :: Parser Text -> Parser Text
quotedOr :: Parser Text -> Parser Text
quotedOr Parser Text
p = forall a. Parser a -> Parser (Maybe a)
maybeP ((Char -> Bool) -> Parser Char
satisfy (forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'"',Char
'\''))) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
             forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser Text
p Char -> Parser Text
quoted

{--
 - attribute value, can't fail.
 -}
attrValue :: Parser Text
attrValue :: Parser Text
attrValue = Parser Text -> Parser Text
quotedOr forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser Text
takeTill ((forall a. Eq a => a -> a -> Bool
==Char
'>') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)

{--
 - attribute name, at least one char, can fail when meet tag end.
 - might match self-close tag end "/>" , make sure match `tagEnd' first.
 -}
attrName :: Parser Text
attrName :: Parser Text
attrName = Parser Text -> Parser Text
quotedOr forall a b. (a -> b) -> a -> b
$
             Char -> Text -> Text
T.cons forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (forall a. Eq a => a -> a -> Bool
/=Char
'>')
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'/',Char
'>',Char
'=') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)

{--
 - tag end, return self-close or not, can fail.
 -}
tagEnd :: Parser Bool
tagEnd :: Parser Bool
tagEnd = Char -> Parser Char
char Char
'>' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
     forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"/>" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True

{--
 - attribute pair or tag end, can fail if tag end met.
 -}
attr :: Parser (Text, Text)
attr :: Parser (Text, Text)
attr = (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
attrName forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
           forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( forall a. Parser a -> Parser Bool
boolP (Char -> Parser Char
char Char
'=') forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                 forall a. a -> a -> Bool -> a
cond (Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
attrValue)
                      (forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"")
               )

{--
 - all attributes before tag end. can't fail.
 -}
attrs :: Parser (Map Text Text, Bool)
attrs :: Parser (Map Text Text, Bool)
attrs = Map Text Text -> Parser (Map Text Text, Bool)
loop forall k a. Map k a
Map.empty
  where
    loop :: Map Text Text -> Parser (Map Text Text, Bool)
loop Map Text Text
acc = Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bool
tagEnd forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall a b. b -> Either a b
Right forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (Text, Text)
attr) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
               forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
                 (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map Text Text
acc,))
                 (\(Text
key, Text
value) -> Map Text Text -> Parser (Map Text Text, Bool)
loop forall a b. (a -> b) -> a -> b
$ forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Text
key Text
value Map Text Text
acc)

{--
 - comment tag without prefix.
 -}
comment :: Parser Token
comment :: Parser Token
comment = Text -> Token
Comment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
comment'
  where comment' :: Parser Text
comment' = Text -> Text -> Text
T.append forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
==Char
'-')
                            forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Text -> Parser Text
string Text
"-->" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
                              forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Int -> Parser Text -> Parser Text
atLeast Int
1 Parser Text
comment' )

{--
 - tags begine with <! , e.g. <!DOCTYPE ...>
 -}
special :: Parser Token
special :: Parser Token
special = Text -> Text -> Token
Special
          forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( Char -> Text -> Text
T.cons forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((forall a. Eq a => a -> a -> Bool
==Char
'-') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace))
                       forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill ((forall a. Eq a => a -> a -> Bool
==Char
'>') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
                       forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace )
          forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
==Char
'>') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'

{--
 - parse a tag, can fail.
 -}
tag :: Parser Token
tag :: Parser Token
tag = do
    TagType
t <-     Text -> Parser Text
string Text
"/" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeClose
         forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"!" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeSpecial
         forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeNormal
    case TagType
t of
        TagType
TagTypeClose ->
            Text -> Token
TagClose forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
==Char
'>')
            forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'
        TagType
TagTypeSpecial -> forall a. Parser a -> Parser Bool
boolP (Text -> Parser Text
string Text
"--") forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                          forall a. a -> a -> Bool -> a
cond Parser Token
comment Parser Token
special
        TagType
TagTypeNormal -> do
            Text
name <- (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'<',Char
'>',Char
'/') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
            (Map Text Text
as, Bool
close) <- Parser (Map Text Text, Bool)
attrs
            forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Map Text Text -> Bool -> Token
TagOpen Text
name (forall a b k. (a -> b) -> Map k a -> Map k b
Map.map Text -> Text
decodeString Map Text Text
as) Bool
close

{--
 - record incomplete tag for streamline processing.
 -}
incomplete :: Parser Token
incomplete :: Parser Token
incomplete = Text -> Token
Incomplete forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text -> Text
T.cons Char
'<' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
takeText

{--
 - parse text node. consume at least one char, to make sure progress.
 -}
text :: Parser Token
text :: Parser Token
text = Text -> Token
Text forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser Text -> Parser Text
atLeast Int
1 ((Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
==Char
'<'))

decodeEntity :: MonadThrow m => Text -> m Text
decodeEntity :: forall (m :: * -> *). MonadThrow m => Text -> m Text
decodeEntity Text
entity =
             forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit
           forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
CL.sourceList [Text
"&",Text
entity,Text
";"]
#if MIN_VERSION_xml_conduit(1,9,0)
          forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text Event m ()
XML.parseText forall a. Default a => a
XML.def { psDecodeEntities :: DecodeEntities
XML.psDecodeEntities = DecodeEntities
XML.decodeHtmlEntities }
#else
          .| XML.parseText' XML.def { XML.psDecodeEntities = XML.decodeHtmlEntities }
#endif
          forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *) o. MonadThrow m => ConduitT Event o m Text
XML.content

token :: Parser Token
token :: Parser Token
token = Char -> Parser Char
char Char
'<' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser Token
tag forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
incomplete)
    forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
text

{--
 - treat script tag specially, can't fail.
 -}
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd Token
open =
    Builder -> Parser [Token]
loop forall a. Monoid a => a
mempty
  where
    loop :: Builder -> Parser [Token]
loop Builder
acc = do
      Text
chunk <- (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
== Char
'<')
      let acc' :: Builder
acc' = Builder
acc forall a. Semigroup a => a -> a -> a
<> Text -> Builder
B.fromText Text
chunk
          finish :: Parser [Token]
finish = forall (f :: * -> *) a. Applicative f => a -> f a
pure [Token
open, Text -> Token
Text forall a b. (a -> b) -> a -> b
$ Text -> Text
L.toStrict forall a b. (a -> b) -> a -> b
$ Builder -> Text
B.toLazyText Builder
acc', Text -> Token
TagClose Text
"script"]
          hasContent :: Parser [Token]
hasContent = (Text -> Parser Text
string Text
"/script>" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser [Token]
finish) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser [Token]
loop (Builder
acc' forall a. Semigroup a => a -> a -> a
<> Builder
"<")
      (Char -> Parser Char
char Char
'<' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser [Token]
hasContent) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser [Token]
finish

tokens :: Parser [Token]
tokens :: Parser [Token]
tokens = do
  Token
t <- Parser Token
token
  case Token
t of
    TagOpen Text
"script" Map Text Text
_ Bool
False -> Token -> Parser [Token]
tillScriptEnd Token
t
    Text Text
text0 -> do
      let parseText :: Parser Text
parseText = do
            Text Text
text <- Parser Token
token
            forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
text
      [Text]
texts <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser Text
parseText
      forall (f :: * -> *) a. Applicative f => a -> f a
pure [Text -> Token
Text forall a b. (a -> b) -> a -> b
$ Text -> Text
decodeString forall a b. (a -> b) -> a -> b
$ [Text] -> Text
T.concat forall a b. (a -> b) -> a -> b
$ Text
text0 forall a. a -> [a] -> [a]
: [Text]
texts]
    Token
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure [Token
t]

{--
 - Utils {{{
 -}

atLeast :: Int -> Parser Text -> Parser Text
atLeast :: Int -> Parser Text -> Parser Text
atLeast Int
0 Parser Text
p = Parser Text
p
atLeast Int
n Parser Text
p = Char -> Text -> Text
T.cons forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> Parser Text -> Parser Text
atLeast (Int
nforall a. Num a => a -> a -> a
-Int
1) Parser Text
p

cond :: a -> a -> Bool -> a
cond :: forall a. a -> a -> Bool -> a
cond a
a1 a
a2 Bool
b = if Bool
b then a
a1 else a
a2

(||.) :: Applicative f => f Bool -> f Bool -> f Bool
||. :: forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
(||.) = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(||)

in2 :: Eq a => (a,a) -> a -> Bool
in2 :: forall a. Eq a => (a, a) -> a -> Bool
in2 (a
a1,a
a2) a
a = a
aforall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aforall a. Eq a => a -> a -> Bool
==a
a2

in3 :: Eq a => (a,a,a) -> a -> Bool
in3 :: forall a. Eq a => (a, a, a) -> a -> Bool
in3 (a
a1,a
a2,a
a3) a
a = a
aforall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aforall a. Eq a => a -> a -> Bool
==a
a2 Bool -> Bool -> Bool
|| a
aforall a. Eq a => a -> a -> Bool
==a
a3

boolP :: Parser a -> Parser Bool
boolP :: forall a. Parser a -> Parser Bool
boolP Parser a
p = Parser a
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False

maybeP :: Parser a -> Parser (Maybe a)
maybeP :: forall a. Parser a -> Parser (Maybe a)
maybeP Parser a
p = forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
-- }}}

-- {{{ Stream
tokenStream :: Monad m
            => ConduitT Text Token m ()
tokenStream :: forall (m :: * -> *). Monad m => ConduitT Text Token m ()
tokenStream =
    forall (m :: * -> *) a. Monad m => (a -> Bool) -> ConduitT a a m ()
CL.filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *) a b.
(Monad m, AttoparsecInput a) =>
Parser a b
-> ConduitT a (Either ParseError (PositionRange, b)) m ()
CA.conduitParserEither Parser [Token]
tokens forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *) a b.
Monad m =>
(a -> [b]) -> ConduitT a b m ()
CL.concatMap forall {a} {a} {a}. Show a => Either a (a, a) -> a
go
  where
    go :: Either a (a, a) -> a
go (Left a
e) = forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"html-conduit: parse error that should never happen occurred! " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show a
e
    go (Right (a
_, a
tokens')) = a
tokens'

splitAccum :: [Token] -> (Text, [Token])
splitAccum :: [Token] -> (Text, [Token])
splitAccum [] = (forall a. Monoid a => a
mempty, [])
splitAccum (forall a. [a] -> [a]
reverse -> (Incomplete Text
s : [Token]
xs)) = (Text
s, forall a. [a] -> [a]
reverse [Token]
xs)
splitAccum [Token]
tokens = (forall a. Monoid a => a
mempty, [Token]
tokens)

-- Entities

-- | A conduit to decode entities from a stream of tokens into a new stream of tokens.
decodeEntities :: Monad m => ConduitT Token Token m ()
decodeEntities :: forall (m :: * -> *). Monad m => ConduitT Token Token m ()
decodeEntities =
    ConduitT Token Token m ()
start
  where
    start :: ConduitT Token Token m ()
start = forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) (\Token
token' -> forall {m :: * -> *}. Monad m => Token -> ConduitT Token Token m ()
start' Token
token' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT Token Token m ()
start)
    start' :: Token -> ConduitT Token Token m ()
start' (Text Text
t) = (forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *). Monad m => ConduitT Token Text m ()
yieldWhileText) forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *). Monad m => ConduitT Text Text m ()
decodeEntities' forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *) a b.
Monad m =>
(a -> Maybe b) -> ConduitT a b m ()
CL.mapMaybe Text -> Maybe Token
go
    start' (TagOpen Text
name Map Text Text
attrs' Bool
bool) = forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Text -> Map Text Text -> Bool -> Token
TagOpen Text
name (forall a b k. (a -> b) -> Map k a -> Map k b
Map.map Text -> Text
decodeString Map Text Text
attrs') Bool
bool)
    start' Token
token' = forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Token
token'

    go :: Text -> Maybe Token
go Text
t
        | Text
t forall a. Eq a => a -> a -> Bool
== Text
""   = forall a. Maybe a
Nothing
        | Bool
otherwise = forall a. a -> Maybe a
Just (Text -> Token
Text Text
t)

-- | Decode entities in a complete string.
decodeString :: Text -> Text
decodeString :: Text -> Text
decodeString Text
input =
  case Text -> (Text, Text)
makeEntityDecoder Text
input of
    (Text
value', Text
remainder)
      | Text
value' forall a. Eq a => a -> a -> Bool
/= forall a. Monoid a => a
mempty -> Text
value' forall a. Semigroup a => a -> a -> a
<> Text -> Text
decodeString Text
remainder
      | Bool
otherwise -> Text
input

decodeEntities' :: Monad m => ConduitT Text Text m ()
decodeEntities' :: forall (m :: * -> *). Monad m => ConduitT Text Text m ()
decodeEntities' =
    forall {m :: * -> *}.
Monad m =>
(Text -> Text) -> ConduitT Text Text m ()
loop forall a. a -> a
id
  where
    loop :: (Text -> Text) -> ConduitT Text Text m ()
loop Text -> Text
accum = do
        Maybe Text
mchunk <- forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await
        let chunk :: Text
chunk = Text -> Text
accum forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a -> a
fromMaybe forall a. Monoid a => a
mempty Maybe Text
mchunk
            (Text
newStr, Text
remainder) = Text -> (Text, Text)
makeEntityDecoder Text
chunk
        forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
newStr
        if forall a. Maybe a -> Bool
isJust Maybe Text
mchunk
            then (Text -> Text) -> ConduitT Text Text m ()
loop (forall a. Monoid a => a -> a -> a
mappend Text
remainder)
            else forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
remainder

-- | Yield contiguous text tokens as strings.
yieldWhileText :: Monad m => ConduitT Token Text m ()
yieldWhileText :: forall (m :: * -> *). Monad m => ConduitT Token Text m ()
yieldWhileText =
    ConduitT Token Text m ()
loop
  where
    loop :: ConduitT Token Text m ()
loop = forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) Token -> ConduitT Token Text m ()
go
    go :: Token -> ConduitT Token Text m ()
go (Text Text
t) = forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT Token Text m ()
loop
    go Token
token' = forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Token
token'

-- | Decode the entities in a string type with a decoder.
makeEntityDecoder :: Text -> (Text, Text)
makeEntityDecoder :: Text -> (Text, Text)
makeEntityDecoder = forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Text -> Text
L.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
B.toLazyText) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> (Builder, Text)
go
  where
    go :: Text -> (Builder, Text)
go Text
s =
      case (Char -> Bool) -> Text -> (Text, Text)
T.break (forall a. Eq a => a -> a -> Bool
==Char
'&') Text
s of
        (Text
_,Text
"") -> (Text -> Builder
B.fromText Text
s, Text
"")
        (Text
before,restPlusAmp :: Text
restPlusAmp@(Int -> Text -> Text
T.drop Int
1 -> Text
rest)) ->
          case (Char -> Bool) -> Text -> (Text, Text)
T.break (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\Char
c -> Char -> Bool
isNameChar Char
c Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'#')) Text
rest of
            (Text
_,Text
"") -> (Text -> Builder
B.fromText Text
before, Text
restPlusAmp)
            (Text
entity,Text
after) -> (Builder
before1 forall a. Semigroup a => a -> a -> a
<> Builder
before2, Text
after')
              where
                before1 :: Builder
before1 = Text -> Builder
B.fromText Text
before
                (Builder
before2, Text
after') =
                  case Maybe Text
mdecoded of
                    Maybe Text
Nothing -> forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first ((Builder
"&" forall a. Semigroup a => a -> a -> a
<> Text -> Builder
B.fromText Text
entity) forall a. Semigroup a => a -> a -> a
<>) (Text -> (Builder, Text)
go Text
after)
                    Just (Text -> Builder
B.fromText -> Builder
decoded) ->
                      case Text -> Maybe (Char, Text)
T.uncons Text
after of
                        Just (Char
';',Text
validAfter) -> forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Builder
decoded forall a. Semigroup a => a -> a -> a
<>) (Text -> (Builder, Text)
go Text
validAfter)
                        Just (Char
_invalid,Text
_rest) -> forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Builder
decoded forall a. Semigroup a => a -> a -> a
<>) (Text -> (Builder, Text)
go Text
after)
                        Maybe (Char, Text)
Nothing -> (forall a. Monoid a => a
mempty, Text
s)
                mdecoded :: Maybe Text
mdecoded =
                  if Text
entity forall a. Eq a => a -> a -> Bool
== forall a. Monoid a => a
mempty
                     then forall a. Maybe a
Nothing
                     else forall (m :: * -> *). MonadThrow m => Text -> m Text
decodeEntity Text
entity

-- | Is the character a valid Name starter?
isNameStart :: Char -> Bool
isNameStart :: Char -> Bool
isNameStart Char
c =
  Char
c forall a. Eq a => a -> a -> Bool
== Char
':' Bool -> Bool -> Bool
||
  Char
c forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
||
  Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
||
  Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xD6') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xF6') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x37D') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x200D') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x218F') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')

-- | Is the character valid in a Name?
isNameChar :: Char -> Bool
isNameChar :: Char -> Bool
isNameChar Char
c =
  Char
c forall a. Eq a => a -> a -> Bool
== Char
'-' Bool -> Bool -> Bool
||
  Char
c forall a. Eq a => a -> a -> Bool
== Char
'.' Bool -> Bool -> Bool
||
  Char
c forall a. Eq a => a -> a -> Bool
== Char
'\xB7' Bool -> Bool -> Bool
||
  Char -> Bool
isDigit Char
c Bool -> Bool -> Bool
||
  Char -> Bool
isNameStart Char
c Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x0300' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x036F') Bool -> Bool -> Bool
||
  (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x203F' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2040')