{-# LANGUAGE OverloadedStrings, TupleSections, ViewPatterns #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
module Text.HTML.TagStream
( Token (..)
, tokenStream
) where
import Control.Applicative
import Control.Monad (unless)
import Control.Monad.Trans.Resource (MonadThrow)
import Data.Char
import qualified Data.Conduit.List as CL
import Data.Attoparsec.Text
import Data.Conduit
import qualified Data.Conduit.Attoparsec as CA
import Data.Maybe (fromMaybe, isJust)
import Data.Monoid ((<>))
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Builder as B
import qualified Text.XML.Stream.Parse as XML
import Data.Map (Map)
import qualified Data.Map.Strict as Map
import Control.Arrow (first)
data Token
= TagOpen Text (Map Text Text) Bool
| TagClose Text
| Text Text
| Text
| Special Text Text
| Incomplete Text
deriving (Token -> Token -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq, Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show)
data TagType
= TagTypeClose
| TagTypeSpecial
| TagTypeNormal
quoted :: Char -> Parser Text
quoted :: Char -> Parser Text
quoted Char
q = Text -> Text -> Text
T.append forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'\\',Char
q))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Char -> Parser Char
char Char
q forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
""
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char Char
'\\' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser Text -> Parser Text
atLeast Int
1 (Char -> Parser Text
quoted Char
q) )
quotedOr :: Parser Text -> Parser Text
quotedOr :: Parser Text -> Parser Text
quotedOr Parser Text
p = forall a. Parser a -> Parser (Maybe a)
maybeP ((Char -> Bool) -> Parser Char
satisfy (forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'"',Char
'\''))) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser Text
p Char -> Parser Text
quoted
attrValue :: Parser Text
attrValue :: Parser Text
attrValue = Parser Text -> Parser Text
quotedOr forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser Text
takeTill ((forall a. Eq a => a -> a -> Bool
==Char
'>') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
attrName :: Parser Text
attrName :: Parser Text
attrName = Parser Text -> Parser Text
quotedOr forall a b. (a -> b) -> a -> b
$
Char -> Text -> Text
T.cons forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (forall a. Eq a => a -> a -> Bool
/=Char
'>')
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'/',Char
'>',Char
'=') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
tagEnd :: Parser Bool
tagEnd :: Parser Bool
tagEnd = Char -> Parser Char
char Char
'>' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"/>" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
attr :: Parser (Text, Text)
attr :: Parser (Text, Text)
attr = (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
attrName forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( forall a. Parser a -> Parser Bool
boolP (Char -> Parser Char
char Char
'=') forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
forall a. a -> a -> Bool -> a
cond (Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
attrValue)
(forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"")
)
attrs :: Parser (Map Text Text, Bool)
attrs :: Parser (Map Text Text, Bool)
attrs = Map Text Text -> Parser (Map Text Text, Bool)
loop forall k a. Map k a
Map.empty
where
loop :: Map Text Text -> Parser (Map Text Text, Bool)
loop Map Text Text
acc = Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bool
tagEnd forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall a b. b -> Either a b
Right forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (Text, Text)
attr) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
(forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map Text Text
acc,))
(\(Text
key, Text
value) -> Map Text Text -> Parser (Map Text Text, Bool)
loop forall a b. (a -> b) -> a -> b
$ forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Text
key Text
value Map Text Text
acc)
comment :: Parser Token
= Text -> Token
Comment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
comment'
where comment' :: Parser Text
comment' = Text -> Text -> Text
T.append forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
==Char
'-')
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Text -> Parser Text
string Text
"-->" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Int -> Parser Text -> Parser Text
atLeast Int
1 Parser Text
comment' )
special :: Parser Token
special :: Parser Token
special = Text -> Text -> Token
Special
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( Char -> Text -> Text
T.cons forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((forall a. Eq a => a -> a -> Bool
==Char
'-') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill ((forall a. Eq a => a -> a -> Bool
==Char
'>') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace )
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
==Char
'>') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'
tag :: Parser Token
tag :: Parser Token
tag = do
TagType
t <- Text -> Parser Text
string Text
"/" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeClose
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"!" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeSpecial
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeNormal
case TagType
t of
TagType
TagTypeClose ->
Text -> Token
TagClose forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
==Char
'>')
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'
TagType
TagTypeSpecial -> forall a. Parser a -> Parser Bool
boolP (Text -> Parser Text
string Text
"--") forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
forall a. a -> a -> Bool -> a
cond Parser Token
comment Parser Token
special
TagType
TagTypeNormal -> do
Text
name <- (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'<',Char
'>',Char
'/') forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
(Map Text Text
as, Bool
close) <- Parser (Map Text Text, Bool)
attrs
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Map Text Text -> Bool -> Token
TagOpen Text
name (forall a b k. (a -> b) -> Map k a -> Map k b
Map.map Text -> Text
decodeString Map Text Text
as) Bool
close
incomplete :: Parser Token
incomplete :: Parser Token
incomplete = Text -> Token
Incomplete forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text -> Text
T.cons Char
'<' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
takeText
text :: Parser Token
text :: Parser Token
text = Text -> Token
Text forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser Text -> Parser Text
atLeast Int
1 ((Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
==Char
'<'))
decodeEntity :: MonadThrow m => Text -> m Text
decodeEntity :: forall (m :: * -> *). MonadThrow m => Text -> m Text
decodeEntity Text
entity =
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit
forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
CL.sourceList [Text
"&",Text
entity,Text
";"]
#if MIN_VERSION_xml_conduit(1,9,0)
forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text Event m ()
XML.parseText forall a. Default a => a
XML.def { psDecodeEntities :: DecodeEntities
XML.psDecodeEntities = DecodeEntities
XML.decodeHtmlEntities }
#else
.| XML.parseText' XML.def { XML.psDecodeEntities = XML.decodeHtmlEntities }
#endif
forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *) o. MonadThrow m => ConduitT Event o m Text
XML.content
token :: Parser Token
token :: Parser Token
token = Char -> Parser Char
char Char
'<' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser Token
tag forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
incomplete)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
text
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd Token
open =
Builder -> Parser [Token]
loop forall a. Monoid a => a
mempty
where
loop :: Builder -> Parser [Token]
loop Builder
acc = do
Text
chunk <- (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
== Char
'<')
let acc' :: Builder
acc' = Builder
acc forall a. Semigroup a => a -> a -> a
<> Text -> Builder
B.fromText Text
chunk
finish :: Parser [Token]
finish = forall (f :: * -> *) a. Applicative f => a -> f a
pure [Token
open, Text -> Token
Text forall a b. (a -> b) -> a -> b
$ Text -> Text
L.toStrict forall a b. (a -> b) -> a -> b
$ Builder -> Text
B.toLazyText Builder
acc', Text -> Token
TagClose Text
"script"]
hasContent :: Parser [Token]
hasContent = (Text -> Parser Text
string Text
"/script>" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser [Token]
finish) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser [Token]
loop (Builder
acc' forall a. Semigroup a => a -> a -> a
<> Builder
"<")
(Char -> Parser Char
char Char
'<' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser [Token]
hasContent) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser [Token]
finish
tokens :: Parser [Token]
tokens :: Parser [Token]
tokens = do
Token
t <- Parser Token
token
case Token
t of
TagOpen Text
"script" Map Text Text
_ Bool
False -> Token -> Parser [Token]
tillScriptEnd Token
t
Text Text
text0 -> do
let parseText :: Parser Text
parseText = do
Text Text
text <- Parser Token
token
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
text
[Text]
texts <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser Text
parseText
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Text -> Token
Text forall a b. (a -> b) -> a -> b
$ Text -> Text
decodeString forall a b. (a -> b) -> a -> b
$ [Text] -> Text
T.concat forall a b. (a -> b) -> a -> b
$ Text
text0 forall a. a -> [a] -> [a]
: [Text]
texts]
Token
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure [Token
t]
atLeast :: Int -> Parser Text -> Parser Text
atLeast :: Int -> Parser Text -> Parser Text
atLeast Int
0 Parser Text
p = Parser Text
p
atLeast Int
n Parser Text
p = Char -> Text -> Text
T.cons forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> Parser Text -> Parser Text
atLeast (Int
nforall a. Num a => a -> a -> a
-Int
1) Parser Text
p
cond :: a -> a -> Bool -> a
cond :: forall a. a -> a -> Bool -> a
cond a
a1 a
a2 Bool
b = if Bool
b then a
a1 else a
a2
(||.) :: Applicative f => f Bool -> f Bool -> f Bool
||. :: forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
(||.) = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(||)
in2 :: Eq a => (a,a) -> a -> Bool
in2 :: forall a. Eq a => (a, a) -> a -> Bool
in2 (a
a1,a
a2) a
a = a
aforall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aforall a. Eq a => a -> a -> Bool
==a
a2
in3 :: Eq a => (a,a,a) -> a -> Bool
in3 :: forall a. Eq a => (a, a, a) -> a -> Bool
in3 (a
a1,a
a2,a
a3) a
a = a
aforall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aforall a. Eq a => a -> a -> Bool
==a
a2 Bool -> Bool -> Bool
|| a
aforall a. Eq a => a -> a -> Bool
==a
a3
boolP :: Parser a -> Parser Bool
boolP :: forall a. Parser a -> Parser Bool
boolP Parser a
p = Parser a
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
maybeP :: Parser a -> Parser (Maybe a)
maybeP :: forall a. Parser a -> Parser (Maybe a)
maybeP Parser a
p = forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
tokenStream :: Monad m
=> ConduitT Text Token m ()
tokenStream :: forall (m :: * -> *). Monad m => ConduitT Text Token m ()
tokenStream =
forall (m :: * -> *) a. Monad m => (a -> Bool) -> ConduitT a a m ()
CL.filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *) a b.
(Monad m, AttoparsecInput a) =>
Parser a b
-> ConduitT a (Either ParseError (PositionRange, b)) m ()
CA.conduitParserEither Parser [Token]
tokens forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *) a b.
Monad m =>
(a -> [b]) -> ConduitT a b m ()
CL.concatMap forall {a} {a} {a}. Show a => Either a (a, a) -> a
go
where
go :: Either a (a, a) -> a
go (Left a
e) = forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"html-conduit: parse error that should never happen occurred! " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show a
e
go (Right (a
_, a
tokens')) = a
tokens'
splitAccum :: [Token] -> (Text, [Token])
splitAccum :: [Token] -> (Text, [Token])
splitAccum [] = (forall a. Monoid a => a
mempty, [])
splitAccum (forall a. [a] -> [a]
reverse -> (Incomplete Text
s : [Token]
xs)) = (Text
s, forall a. [a] -> [a]
reverse [Token]
xs)
splitAccum [Token]
tokens = (forall a. Monoid a => a
mempty, [Token]
tokens)
decodeEntities :: Monad m => ConduitT Token Token m ()
decodeEntities :: forall (m :: * -> *). Monad m => ConduitT Token Token m ()
decodeEntities =
ConduitT Token Token m ()
start
where
start :: ConduitT Token Token m ()
start = forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) (\Token
token' -> forall {m :: * -> *}. Monad m => Token -> ConduitT Token Token m ()
start' Token
token' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT Token Token m ()
start)
start' :: Token -> ConduitT Token Token m ()
start' (Text Text
t) = (forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *). Monad m => ConduitT Token Text m ()
yieldWhileText) forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *). Monad m => ConduitT Text Text m ()
decodeEntities' forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *) a b.
Monad m =>
(a -> Maybe b) -> ConduitT a b m ()
CL.mapMaybe Text -> Maybe Token
go
start' (TagOpen Text
name Map Text Text
attrs' Bool
bool) = forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Text -> Map Text Text -> Bool -> Token
TagOpen Text
name (forall a b k. (a -> b) -> Map k a -> Map k b
Map.map Text -> Text
decodeString Map Text Text
attrs') Bool
bool)
start' Token
token' = forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Token
token'
go :: Text -> Maybe Token
go Text
t
| Text
t forall a. Eq a => a -> a -> Bool
== Text
"" = forall a. Maybe a
Nothing
| Bool
otherwise = forall a. a -> Maybe a
Just (Text -> Token
Text Text
t)
decodeString :: Text -> Text
decodeString :: Text -> Text
decodeString Text
input =
case Text -> (Text, Text)
makeEntityDecoder Text
input of
(Text
value', Text
remainder)
| Text
value' forall a. Eq a => a -> a -> Bool
/= forall a. Monoid a => a
mempty -> Text
value' forall a. Semigroup a => a -> a -> a
<> Text -> Text
decodeString Text
remainder
| Bool
otherwise -> Text
input
decodeEntities' :: Monad m => ConduitT Text Text m ()
decodeEntities' :: forall (m :: * -> *). Monad m => ConduitT Text Text m ()
decodeEntities' =
forall {m :: * -> *}.
Monad m =>
(Text -> Text) -> ConduitT Text Text m ()
loop forall a. a -> a
id
where
loop :: (Text -> Text) -> ConduitT Text Text m ()
loop Text -> Text
accum = do
Maybe Text
mchunk <- forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await
let chunk :: Text
chunk = Text -> Text
accum forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a -> a
fromMaybe forall a. Monoid a => a
mempty Maybe Text
mchunk
(Text
newStr, Text
remainder) = Text -> (Text, Text)
makeEntityDecoder Text
chunk
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
newStr
if forall a. Maybe a -> Bool
isJust Maybe Text
mchunk
then (Text -> Text) -> ConduitT Text Text m ()
loop (forall a. Monoid a => a -> a -> a
mappend Text
remainder)
else forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
remainder
yieldWhileText :: Monad m => ConduitT Token Text m ()
yieldWhileText :: forall (m :: * -> *). Monad m => ConduitT Token Text m ()
yieldWhileText =
ConduitT Token Text m ()
loop
where
loop :: ConduitT Token Text m ()
loop = forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) Token -> ConduitT Token Text m ()
go
go :: Token -> ConduitT Token Text m ()
go (Text Text
t) = forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT Token Text m ()
loop
go Token
token' = forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Token
token'
makeEntityDecoder :: Text -> (Text, Text)
makeEntityDecoder :: Text -> (Text, Text)
makeEntityDecoder = forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Text -> Text
L.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
B.toLazyText) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> (Builder, Text)
go
where
go :: Text -> (Builder, Text)
go Text
s =
case (Char -> Bool) -> Text -> (Text, Text)
T.break (forall a. Eq a => a -> a -> Bool
==Char
'&') Text
s of
(Text
_,Text
"") -> (Text -> Builder
B.fromText Text
s, Text
"")
(Text
before,restPlusAmp :: Text
restPlusAmp@(Int -> Text -> Text
T.drop Int
1 -> Text
rest)) ->
case (Char -> Bool) -> Text -> (Text, Text)
T.break (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\Char
c -> Char -> Bool
isNameChar Char
c Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'#')) Text
rest of
(Text
_,Text
"") -> (Text -> Builder
B.fromText Text
before, Text
restPlusAmp)
(Text
entity,Text
after) -> (Builder
before1 forall a. Semigroup a => a -> a -> a
<> Builder
before2, Text
after')
where
before1 :: Builder
before1 = Text -> Builder
B.fromText Text
before
(Builder
before2, Text
after') =
case Maybe Text
mdecoded of
Maybe Text
Nothing -> forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first ((Builder
"&" forall a. Semigroup a => a -> a -> a
<> Text -> Builder
B.fromText Text
entity) forall a. Semigroup a => a -> a -> a
<>) (Text -> (Builder, Text)
go Text
after)
Just (Text -> Builder
B.fromText -> Builder
decoded) ->
case Text -> Maybe (Char, Text)
T.uncons Text
after of
Just (Char
';',Text
validAfter) -> forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Builder
decoded forall a. Semigroup a => a -> a -> a
<>) (Text -> (Builder, Text)
go Text
validAfter)
Just (Char
_invalid,Text
_rest) -> forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Builder
decoded forall a. Semigroup a => a -> a -> a
<>) (Text -> (Builder, Text)
go Text
after)
Maybe (Char, Text)
Nothing -> (forall a. Monoid a => a
mempty, Text
s)
mdecoded :: Maybe Text
mdecoded =
if Text
entity forall a. Eq a => a -> a -> Bool
== forall a. Monoid a => a
mempty
then forall a. Maybe a
Nothing
else forall (m :: * -> *). MonadThrow m => Text -> m Text
decodeEntity Text
entity
isNameStart :: Char -> Bool
isNameStart :: Char -> Bool
isNameStart Char
c =
Char
c forall a. Eq a => a -> a -> Bool
== Char
':' Bool -> Bool -> Bool
||
Char
c forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
||
Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
||
Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xD6') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xF6') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x37D') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x200D') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x218F') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')
isNameChar :: Char -> Bool
isNameChar :: Char -> Bool
isNameChar Char
c =
Char
c forall a. Eq a => a -> a -> Bool
== Char
'-' Bool -> Bool -> Bool
||
Char
c forall a. Eq a => a -> a -> Bool
== Char
'.' Bool -> Bool -> Bool
||
Char
c forall a. Eq a => a -> a -> Bool
== Char
'\xB7' Bool -> Bool -> Bool
||
Char -> Bool
isDigit Char
c Bool -> Bool -> Bool
||
Char -> Bool
isNameStart Char
c Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x0300' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x036F') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x203F' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2040')