{-# LANGUAGE TypeSynonymInstances #-}
module Text.XML.Light.Lexer where
import Text.XML.Light.Types
import Data.Char (chr,isSpace)
import Numeric (readHex)
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Text as TS
import qualified Data.Text.Lazy as TL
class XmlSource s where
uncons :: s -> Maybe (Char,s)
instance XmlSource String where
uncons :: String -> Maybe (Char, String)
uncons (Char
c:String
s) = forall a. a -> Maybe a
Just (Char
c,String
s)
uncons String
"" = forall a. Maybe a
Nothing
instance XmlSource S.ByteString where
uncons :: ByteString -> Maybe (Char, ByteString)
uncons ByteString
bs = forall {a} {b}. Enum a => (a, b) -> (Char, b)
f forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ByteString -> Maybe (Word8, ByteString)
S.uncons ByteString
bs
where f :: (a, b) -> (Char, b)
f (a
c,b
s) = (Int -> Char
chr (forall a. Enum a => a -> Int
fromEnum a
c), b
s)
instance XmlSource L.ByteString where
uncons :: ByteString -> Maybe (Char, ByteString)
uncons ByteString
bs = forall {a} {b}. Enum a => (a, b) -> (Char, b)
f forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ByteString -> Maybe (Word8, ByteString)
L.uncons ByteString
bs
where f :: (a, b) -> (Char, b)
f (a
c,b
s) = (Int -> Char
chr (forall a. Enum a => a -> Int
fromEnum a
c), b
s)
instance XmlSource TS.Text where
uncons :: Text -> Maybe (Char, Text)
uncons = Text -> Maybe (Char, Text)
TS.uncons
instance XmlSource TL.Text where
uncons :: Text -> Maybe (Char, Text)
uncons = Text -> Maybe (Char, Text)
TL.uncons
linenumber :: XmlSource s => Integer -> s -> LString
linenumber :: forall s. XmlSource s => Integer -> s -> LString
linenumber Integer
n s
s = case forall s. XmlSource s => s -> Maybe (Char, s)
uncons s
s of
Maybe (Char, s)
Nothing -> []
Just (Char
'\r', s
s') -> case forall s. XmlSource s => s -> Maybe (Char, s)
uncons s
s' of
Just (Char
'\n',s
s'') -> forall {s}. XmlSource s => s -> LString
next s
s''
Maybe (Char, s)
_ -> forall {s}. XmlSource s => s -> LString
next s
s'
Just (Char
'\n', s
s') -> forall {s}. XmlSource s => s -> LString
next s
s'
Just (Char
c , s
s') -> (Integer
n,Char
c) forall a. a -> [a] -> [a]
: forall s. XmlSource s => Integer -> s -> LString
linenumber Integer
n s
s'
where
next :: s -> LString
next s
s' = Integer
n' seq :: forall a b. a -> b -> b
`seq` ((Integer
n,Char
'\n')forall a. a -> [a] -> [a]
:forall s. XmlSource s => Integer -> s -> LString
linenumber Integer
n' s
s') where n' :: Integer
n' = Integer
n forall a. Num a => a -> a -> a
+ Integer
1
data Scanner s = Scanner (Maybe (Char,s)) (s -> Maybe (Char,s))
customScanner :: (s -> Maybe (Char,s)) -> s -> Scanner s
customScanner :: forall s. (s -> Maybe (Char, s)) -> s -> Scanner s
customScanner s -> Maybe (Char, s)
next s
s = forall s. Maybe (Char, s) -> (s -> Maybe (Char, s)) -> Scanner s
Scanner (s -> Maybe (Char, s)
next s
s) s -> Maybe (Char, s)
next
instance XmlSource (Scanner s) where
uncons :: Scanner s -> Maybe (Char, Scanner s)
uncons (Scanner Maybe (Char, s)
this s -> Maybe (Char, s)
next) = do (Char
c,s
s1) <- Maybe (Char, s)
this
forall (m :: * -> *) a. Monad m => a -> m a
return (Char
c, forall s. Maybe (Char, s) -> (s -> Maybe (Char, s)) -> Scanner s
Scanner (s -> Maybe (Char, s)
next s
s1) s -> Maybe (Char, s)
next)
type LChar = (Line,Char)
type LString = [LChar]
data Token = TokStart Line QName [Attr] Bool
| TokEnd Line QName
| TokCRef String
| TokText CData
deriving Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show
tokens :: XmlSource source => source -> [Token]
tokens :: forall source. XmlSource source => source -> [Token]
tokens = LString -> [Token]
tokens' forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. XmlSource s => Integer -> s -> LString
linenumber Integer
1
tokens' :: LString -> [Token]
tokens' :: LString -> [Token]
tokens' ((Integer
_,Char
'<') : c :: (Integer, Char)
c@(Integer
_,Char
'!') : LString
cs) = (Integer, Char) -> LString -> [Token]
special (Integer, Char)
c LString
cs
tokens' ((Integer
_,Char
'<') : LString
cs) = LString -> [Token]
tag (LString -> LString
dropSpace LString
cs)
tokens' [] = []
tokens' cs :: LString
cs@((Integer
l,Char
_):LString
_) = let (String
as,LString
bs) = forall a b. (a -> Bool) -> [(b, a)] -> ([a], [(b, a)])
breakn (Char
'<' forall a. Eq a => a -> a -> Bool
==) LString
cs
in forall a b. (a -> b) -> [a] -> [b]
map Txt -> Token
cvt (String -> [Txt]
decode_text String
as) forall a. [a] -> [a] -> [a]
++ LString -> [Token]
tokens' LString
bs
where cvt :: Txt -> Token
cvt (TxtBit String
x) = CData -> Token
TokText CData { cdLine :: Maybe Integer
cdLine = forall a. a -> Maybe a
Just Integer
l
, cdVerbatim :: CDataKind
cdVerbatim = CDataKind
CDataText
, cdData :: String
cdData = String
x
}
cvt (CRefBit String
x) = case String -> Maybe Char
cref_to_char String
x of
Just Char
c -> CData -> Token
TokText CData { cdLine :: Maybe Integer
cdLine = forall a. a -> Maybe a
Just Integer
l
, cdVerbatim :: CDataKind
cdVerbatim = CDataKind
CDataText
, cdData :: String
cdData = [Char
c]
}
Maybe Char
Nothing -> String -> Token
TokCRef String
x
special :: LChar -> LString -> [Token]
special :: (Integer, Char) -> LString -> [Token]
special (Integer, Char)
_ ((Integer
_,Char
'-') : (Integer
_,Char
'-') : LString
cs) = LString -> [Token]
skip LString
cs
where skip :: LString -> [Token]
skip ((Integer
_,Char
'-') : (Integer
_,Char
'-') : (Integer
_,Char
'>') : LString
ds) = LString -> [Token]
tokens' LString
ds
skip ((Integer, Char)
_ : LString
ds) = LString -> [Token]
skip LString
ds
skip [] = []
special (Integer, Char)
c ((Integer
_,Char
'[') : (Integer
_,Char
'C') : (Integer
_,Char
'D') : (Integer
_,Char
'A') : (Integer
_,Char
'T') : (Integer
_,Char
'A') : (Integer
_,Char
'[')
: LString
cs) =
let (String
xs,LString
ts) = forall {a}. [(a, Char)] -> (String, [(a, Char)])
cdata LString
cs
in CData -> Token
TokText CData { cdLine :: Maybe Integer
cdLine = forall a. a -> Maybe a
Just (forall a b. (a, b) -> a
fst (Integer, Char)
c), cdVerbatim :: CDataKind
cdVerbatim = CDataKind
CDataVerbatim, cdData :: String
cdData = String
xs }
forall a. a -> [a] -> [a]
: LString -> [Token]
tokens' LString
ts
where cdata :: [(a, Char)] -> (String, [(a, Char)])
cdata ((a
_,Char
']') : (a
_,Char
']') : (a
_,Char
'>') : [(a, Char)]
ds) = ([],[(a, Char)]
ds)
cdata ((a
_,Char
d) : [(a, Char)]
ds) = let (String
xs,[(a, Char)]
ys) = [(a, Char)] -> (String, [(a, Char)])
cdata [(a, Char)]
ds in (Char
dforall a. a -> [a] -> [a]
:String
xs,[(a, Char)]
ys)
cdata [] = ([],[])
special (Integer, Char)
c LString
cs =
let (String
xs,LString
ts) = forall {a}. String -> Int -> [(a, Char)] -> (String, [(a, Char)])
munch String
"" Int
0 LString
cs
in CData -> Token
TokText CData { cdLine :: Maybe Integer
cdLine = forall a. a -> Maybe a
Just (forall a b. (a, b) -> a
fst (Integer, Char)
c)
, cdVerbatim :: CDataKind
cdVerbatim = CDataKind
CDataRaw
, cdData :: String
cdData = Char
'<'forall a. a -> [a] -> [a]
:Char
'!'forall a. a -> [a] -> [a]
:(forall a. [a] -> [a]
reverse String
xs)
} forall a. a -> [a] -> [a]
: LString -> [Token]
tokens' LString
ts
where munch :: String -> Int -> [(a, Char)] -> (String, [(a, Char)])
munch String
acc Int
nesting ((a
_,Char
'>') : [(a, Char)]
ds)
| Int
nesting forall a. Eq a => a -> a -> Bool
== (Int
0::Int) = (Char
'>'forall a. a -> [a] -> [a]
:String
acc,[(a, Char)]
ds)
| Bool
otherwise = String -> Int -> [(a, Char)] -> (String, [(a, Char)])
munch (Char
'>'forall a. a -> [a] -> [a]
:String
acc) (Int
nestingforall a. Num a => a -> a -> a
-Int
1) [(a, Char)]
ds
munch String
acc Int
nesting ((a
_,Char
'<') : [(a, Char)]
ds)
= String -> Int -> [(a, Char)] -> (String, [(a, Char)])
munch (Char
'<'forall a. a -> [a] -> [a]
:String
acc) (Int
nestingforall a. Num a => a -> a -> a
+Int
1) [(a, Char)]
ds
munch String
acc Int
n ((a
_,Char
x) : [(a, Char)]
ds) = String -> Int -> [(a, Char)] -> (String, [(a, Char)])
munch (Char
xforall a. a -> [a] -> [a]
:String
acc) Int
n [(a, Char)]
ds
munch String
acc Int
_ [] = (String
acc,[])
qualName :: LString -> (QName,LString)
qualName :: LString -> (QName, LString)
qualName LString
xs = let (String
as,LString
bs) = forall a b. (a -> Bool) -> [(b, a)] -> ([a], [(b, a)])
breakn Char -> Bool
endName LString
xs
(Maybe String
q,String
n) = case forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char
':'forall a. Eq a => a -> a -> Bool
==) String
as of
(String
q1,Char
_:String
n1) -> (forall a. a -> Maybe a
Just String
q1, String
n1)
(String, String)
_ -> (forall a. Maybe a
Nothing, String
as)
in (QName { qURI :: Maybe String
qURI = forall a. Maybe a
Nothing, qPrefix :: Maybe String
qPrefix = Maybe String
q, qName :: String
qName = String
n }, LString
bs)
where endName :: Char -> Bool
endName Char
x = Char -> Bool
isSpace Char
x Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'=' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'>' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'/'
tag :: LString -> [Token]
tag :: LString -> [Token]
tag ((Integer
p,Char
'/') : LString
cs) = let (QName
n,LString
ds) = LString -> (QName, LString)
qualName (LString -> LString
dropSpace LString
cs)
in Integer -> QName -> Token
TokEnd Integer
p QName
n forall a. a -> [a] -> [a]
: case (LString -> LString
dropSpace LString
ds) of
(Integer
_,Char
'>') : LString
es -> LString -> [Token]
tokens' LString
es
LString
_ -> LString -> [Token]
tokens' LString
ds
tag [] = []
tag LString
cs = let (QName
n,LString
ds) = LString -> (QName, LString)
qualName LString
cs
([Attr]
as,Bool
b,[Token]
ts) = LString -> ([Attr], Bool, [Token])
attribs (LString -> LString
dropSpace LString
ds)
in Integer -> QName -> [Attr] -> Bool -> Token
TokStart (forall a b. (a, b) -> a
fst (forall a. [a] -> a
head LString
cs)) QName
n [Attr]
as Bool
b forall a. a -> [a] -> [a]
: [Token]
ts
attribs :: LString -> ([Attr], Bool, [Token])
attribs :: LString -> ([Attr], Bool, [Token])
attribs LString
cs = case LString
cs of
(Integer
_,Char
'>') : LString
ds -> ([], Bool
False, LString -> [Token]
tokens' LString
ds)
(Integer
_,Char
'/') : LString
ds -> ([], Bool
True, case LString
ds of
(Integer
_,Char
'>') : LString
es -> LString -> [Token]
tokens' LString
es
LString
_ -> LString -> [Token]
tokens' LString
ds)
(Integer
_,Char
'?') : (Integer
_,Char
'>') : LString
ds -> ([], Bool
True, LString -> [Token]
tokens' LString
ds)
[] -> ([],Bool
False,[])
LString
_ -> let (Attr
a,LString
cs1) = LString -> (Attr, LString)
attrib LString
cs
([Attr]
as,Bool
b,[Token]
ts) = LString -> ([Attr], Bool, [Token])
attribs LString
cs1
in (Attr
aforall a. a -> [a] -> [a]
:[Attr]
as,Bool
b,[Token]
ts)
attrib :: LString -> (Attr,LString)
attrib :: LString -> (Attr, LString)
attrib LString
cs = let (QName
ks,LString
cs1) = LString -> (QName, LString)
qualName LString
cs
(String
vs,LString
cs2) = LString -> (String, LString)
attr_val (LString -> LString
dropSpace LString
cs1)
in ((QName -> String -> Attr
Attr QName
ks (ShowS
decode_attr String
vs)),LString -> LString
dropSpace LString
cs2)
attr_val :: LString -> (String,LString)
attr_val :: LString -> (String, LString)
attr_val ((Integer
_,Char
'=') : LString
cs) = LString -> (String, LString)
string (LString -> LString
dropSpace LString
cs)
attr_val LString
cs = (String
"",LString
cs)
dropSpace :: LString -> LString
dropSpace :: LString -> LString
dropSpace = forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Bool
isSpace forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd)
string :: LString -> (String,LString)
string :: LString -> (String, LString)
string ((Integer
_,Char
'"') : LString
cs) = forall a b. (a -> Bool) -> [(b, a)] -> ([a], [(b, a)])
break' (Char
'"' forall a. Eq a => a -> a -> Bool
==) LString
cs
string ((Integer
_,Char
'\'') : LString
cs) = forall a b. (a -> Bool) -> [(b, a)] -> ([a], [(b, a)])
break' (Char
'\'' forall a. Eq a => a -> a -> Bool
==) LString
cs
string LString
cs = forall a b. (a -> Bool) -> [(b, a)] -> ([a], [(b, a)])
breakn Char -> Bool
eos LString
cs
where eos :: Char -> Bool
eos Char
x = Char -> Bool
isSpace Char
x Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'>' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'/'
break' :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
break' :: forall a b. (a -> Bool) -> [(b, a)] -> ([a], [(b, a)])
break' a -> Bool
p [(b, a)]
xs = let ([a]
as,[(b, a)]
bs) = forall a b. (a -> Bool) -> [(b, a)] -> ([a], [(b, a)])
breakn a -> Bool
p [(b, a)]
xs
in ([a]
as, case [(b, a)]
bs of
[] -> []
(b, a)
_ : [(b, a)]
cs -> [(b, a)]
cs)
breakn :: (a -> Bool) -> [(b,a)] -> ([a],[(b,a)])
breakn :: forall a b. (a -> Bool) -> [(b, a)] -> ([a], [(b, a)])
breakn a -> Bool
p [(b, a)]
l = (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(b, a)]
as,[(b, a)]
bs) where ([(b, a)]
as,[(b, a)]
bs) = forall a. (a -> Bool) -> [a] -> ([a], [a])
break (a -> Bool
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) [(b, a)]
l
decode_attr :: String -> String
decode_attr :: ShowS
decode_attr String
cs = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Txt -> String
cvt (String -> [Txt]
decode_text String
cs)
where cvt :: Txt -> String
cvt (TxtBit String
x) = String
x
cvt (CRefBit String
x) = case String -> Maybe Char
cref_to_char String
x of
Just Char
c -> [Char
c]
Maybe Char
Nothing -> Char
'&' forall a. a -> [a] -> [a]
: String
x forall a. [a] -> [a] -> [a]
++ String
";"
data Txt = TxtBit String | CRefBit String deriving Int -> Txt -> ShowS
[Txt] -> ShowS
Txt -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Txt] -> ShowS
$cshowList :: [Txt] -> ShowS
show :: Txt -> String
$cshow :: Txt -> String
showsPrec :: Int -> Txt -> ShowS
$cshowsPrec :: Int -> Txt -> ShowS
Show
decode_text :: [Char] -> [Txt]
decode_text :: String -> [Txt]
decode_text xs :: String
xs@(Char
'&' : String
cs) = case forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char
';' forall a. Eq a => a -> a -> Bool
==) String
cs of
(String
as,Char
_:String
bs) -> String -> Txt
CRefBit String
as forall a. a -> [a] -> [a]
: String -> [Txt]
decode_text String
bs
(String, String)
_ -> [String -> Txt
TxtBit String
xs]
decode_text [] = []
decode_text String
cs = let (String
as,String
bs) = forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char
'&' forall a. Eq a => a -> a -> Bool
==) String
cs
in String -> Txt
TxtBit String
as forall a. a -> [a] -> [a]
: String -> [Txt]
decode_text String
bs
cref_to_char :: [Char] -> Maybe Char
cref_to_char :: String -> Maybe Char
cref_to_char String
cs = case String
cs of
Char
'#' : String
ds -> String -> Maybe Char
num_esc String
ds
String
"lt" -> forall a. a -> Maybe a
Just Char
'<'
String
"gt" -> forall a. a -> Maybe a
Just Char
'>'
String
"amp" -> forall a. a -> Maybe a
Just Char
'&'
String
"apos" -> forall a. a -> Maybe a
Just Char
'\''
String
"quot" -> forall a. a -> Maybe a
Just Char
'"'
String
_ -> forall a. Maybe a
Nothing
num_esc :: String -> Maybe Char
num_esc :: String -> Maybe Char
num_esc String
cs = case String
cs of
Char
'x' : String
ds -> [(Int, String)] -> Maybe Char
check (forall a. (Eq a, Num a) => ReadS a
readHex String
ds)
String
_ -> [(Int, String)] -> Maybe Char
check (forall a. Read a => ReadS a
reads String
cs)
where check :: [(Int, String)] -> Maybe Char
check [(Int
n,String
"")] = Int -> Maybe Char
cvt_char Int
n
check [(Int, String)]
_ = forall a. Maybe a
Nothing
cvt_char :: Int -> Maybe Char
cvt_char :: Int -> Maybe Char
cvt_char Int
x
| forall a. Enum a => a -> Int
fromEnum (forall a. Bounded a => a
minBound :: Char) forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x forall a. Ord a => a -> a -> Bool
<= forall a. Enum a => a -> Int
fromEnum (forall a. Bounded a => a
maxBound::Char)
= forall a. a -> Maybe a
Just (forall a. Enum a => Int -> a
toEnum Int
x)
| Bool
otherwise = forall a. Maybe a
Nothing