-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.DocumentInput
   Copyright  : Copyright (C) 2005 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   State arrows for document input
-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.DocumentInput
    ( getXmlContents
    , getXmlEntityContents
    , getEncoding
    , getTextEncoding
    , decodeDocument
    , addInputError
    )
where

import           Control.Arrow
import           Control.Arrow.ArrowIf
import           Control.Arrow.ArrowIO
import           Control.Arrow.ArrowList
import           Control.Arrow.ArrowTree
import           Control.Arrow.ListArrow

import           Data.List                            (isPrefixOf)
import           Data.String.Unicode                  (getDecodingFct,
                                                       guessEncoding,
                                                       normalizeNL)

import           System.FilePath                      (takeExtension)

import qualified Text.XML.HXT.IO.GetFILE              as FILE

import           Text.XML.HXT.DOM.Interface

import           Text.XML.HXT.Arrow.ParserInterface   (parseXmlDocEncodingSpec, parseXmlEntityEncodingSpec,
                                                       removeEncodingSpec)
import           Text.XML.HXT.Arrow.XmlArrow
import           Text.XML.HXT.Arrow.XmlState
import           Text.XML.HXT.Arrow.XmlState.TypeDefs

-- ----------------------------------------------------------

protocolHandlers        :: AssocList String (IOStateArrow s XmlTree XmlTree)
protocolHandlers :: forall s. AssocList String (IOStateArrow s XmlTree XmlTree)
protocolHandlers
    = [ (String
"file",        forall s. IOStateArrow s XmlTree XmlTree
getFileContents)
      , (String
"http",        forall s. IOStateArrow s XmlTree XmlTree
getHttpContents)
      , (String
"https",       forall s. IOStateArrow s XmlTree XmlTree
getHttpContents)
      , (String
"stdin",       forall s. IOStateArrow s XmlTree XmlTree
getStdinContents)
      ]

getProtocolHandler      :: IOStateArrow s String (IOStateArrow s XmlTree XmlTree)
getProtocolHandler :: forall s. IOStateArrow s String (IOStateArrow s XmlTree XmlTree)
getProtocolHandler
    = forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr (\ String
s -> forall k v. Eq k => v -> k -> AssocList k v -> v
lookupDef forall s. IOStateArrow s XmlTree XmlTree
getUnsupported String
s forall s. AssocList String (IOStateArrow s XmlTree XmlTree)
protocolHandlers)

getUnsupported          :: IOStateArrow s XmlTree XmlTree
getUnsupported :: forall s. IOStateArrow s XmlTree XmlTree
getUnsupported
    = forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
                forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr ((String
"unsupported protocol in URI " forall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show)
                forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                forall (a :: * -> * -> *) b c. ArrowList a => a b (a b c) -> a b c
applyA (forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr forall s b. String -> IOStateArrow s b b
issueFatal)
              )
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"accessing documents"

getStringContents               :: IOStateArrow s XmlTree XmlTree
getStringContents :: forall s. IOStateArrow s XmlTree XmlTree
getStringContents
    = forall {cat :: * -> * -> *}.
ArrowXml cat =>
String -> cat XmlTree XmlTree
setCont forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferMessage String
"OK"
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferStatus String
"200"
    where
    setCont :: String -> cat XmlTree XmlTree
setCont String
contents
        = forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren (forall (a :: * -> * -> *) n. ArrowXml a => String -> a n XmlTree
txt String
contents')
          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
          forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferURI (forall a. Int -> [a] -> [a]
take Int
7 String
contents)                 -- the "string:" prefix is stored, this is required by setBaseURIFromDoc
          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
          forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
a_source (forall a. Show a => a -> String
show forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> String
prefix Int
48 forall a b. (a -> b) -> a -> b
$ String
contents')       -- a quoted prefix of the content, max 48 chars is taken as source name
        where
        contents' :: String
contents'  = forall a. Int -> [a] -> [a]
drop (forall (t :: * -> *) a. Foldable t => t a -> Int
length String
stringProtocol) String
contents
        prefix :: Int -> String -> String
prefix Int
l String
s
            | forall (t :: * -> *) a. Foldable t => t a -> Int
length String
s' forall a. Ord a => a -> a -> Bool
> Int
l = forall a. Int -> [a] -> [a]
take (Int
l forall a. Num a => a -> a -> a
- Int
3) String
s' forall a. [a] -> [a] -> [a]
++ String
"..."
            | Bool
otherwise     = String
s'
            where
            s' :: String
s' = forall a. Int -> [a] -> [a]
take (Int
l forall a. Num a => a -> a -> a
+ Int
1) String
s

getFileContents         :: IOStateArrow s XmlTree XmlTree
getFileContents :: forall s. IOStateArrow s XmlTree XmlTree
getFileContents
    = forall (a :: * -> * -> *) b c. ArrowList a => a b (a b c) -> a b c
applyA ( ( forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState Bool
theStrictInput
                 forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&&
                 ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
transferURI
                   forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                   forall (a :: * -> * -> *). ArrowList a => a String String
getPathFromURI
                 )
               )
               forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
               forall b s. Int -> (b -> String) -> IOStateArrow s b b
traceValue Int
2 (\ (Bool
b, String
f) -> String
"read file " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show String
f forall a. [a] -> [a] -> [a]
++ String
" (strict input = " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Bool
b forall a. [a] -> [a] -> [a]
++ String
")")
               forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
               forall (a :: * -> * -> *) b c. ArrowIO a => (b -> IO c) -> a b c
arrIO (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Bool
-> String -> IO (Either ([(String, String)], String) ByteString)
FILE.getCont)
               forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
               ( forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall s.
[(String, String)] -> String -> IOStateArrow s XmlTree XmlTree
addInputError) -- io error occured
                 forall (a :: * -> * -> *) b d c.
ArrowChoice a =>
a b d -> a c d -> a (Either b c) d
|||
                 forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr forall s. ByteString -> IOStateArrow s XmlTree XmlTree
addTxtContent      -- content read
               )
             )
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall s. IOStateArrow s XmlTree XmlTree
addMimeType

getStdinContents                :: IOStateArrow s XmlTree XmlTree
getStdinContents :: forall s. IOStateArrow s XmlTree XmlTree
getStdinContents
    = forall (a :: * -> * -> *) b c. ArrowList a => a b (a b c) -> a b c
applyA (  forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState Bool
theStrictInput
                forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                forall (a :: * -> * -> *) b c. ArrowIO a => (b -> IO c) -> a b c
arrIO Bool -> IO (Either ([(String, String)], String) ByteString)
FILE.getStdinCont
               forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
               ( forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall s.
[(String, String)] -> String -> IOStateArrow s XmlTree XmlTree
addInputError) -- io error occured
                 forall (a :: * -> * -> *) b d c.
ArrowChoice a =>
a b d -> a c d -> a (Either b c) d
|||
                 forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr forall s. ByteString -> IOStateArrow s XmlTree XmlTree
addTxtContent           -- content read
               )
             )

addInputError                :: Attributes -> String -> IOStateArrow s XmlTree XmlTree
addInputError :: forall s.
[(String, String)] -> String -> IOStateArrow s XmlTree XmlTree
addInputError [(String, String)]
al String
e
    = forall s b. String -> IOStateArrow s b b
issueFatal String
e
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall (a :: * -> * -> *) b. ArrowList a => [a b b] -> a b b
seqA (forall a b. (a -> b) -> [a] -> [b]
map (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr) [(String, String)]
al)
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"accessing documents"

addMimeType     :: IOStateArrow s XmlTree XmlTree
addMimeType :: forall s. IOStateArrow s XmlTree XmlTree
addMimeType
    = forall {cat :: * -> * -> *}.
ArrowXml cat =>
String -> cat XmlTree XmlTree
addMime forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< ( ( forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState String
theFileMimeType
                     forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                     forall (a :: * -> * -> *) b. ArrowList a => (b -> Bool) -> a b b
isA (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Bool
null)
                   )
                   forall (a :: * -> * -> *) b c. ArrowIf a => a b c -> a b c -> a b c
`orElse`
                   ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
transferURI
                     forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                     ( forall {a :: * -> * -> *}.
Arrow a =>
MimeTypeTable -> a String String
uriToMime forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall s b. IOStateArrow s b MimeTypeTable
getMimeTypeTable )
                   )
                 )
    where
    addMime :: String -> a XmlTree XmlTree
addMime String
mt
        = forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferMimeType String
mt
    uriToMime :: MimeTypeTable -> a String String
uriToMime MimeTypeTable
mtt
        = forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr forall a b. (a -> b) -> a -> b
$ ( \ String
uri -> String -> MimeTypeTable -> String
extensionToMimeType (forall a. Int -> [a] -> [a]
drop Int
1 forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
takeExtension forall a b. (a -> b) -> a -> b
$ String
uri) MimeTypeTable
mtt )

addTxtContent   :: Blob -> IOStateArrow s XmlTree XmlTree
addTxtContent :: forall s. ByteString -> IOStateArrow s XmlTree XmlTree
addTxtContent ByteString
bc
    = forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren (forall (a :: * -> * -> *) n.
ArrowXml a =>
ByteString -> a n XmlTree
blb ByteString
bc)
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferMessage String
"OK"
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferStatus String
"200"

getHttpContents         :: IOStateArrow s XmlTree XmlTree
getHttpContents :: forall s. IOStateArrow s XmlTree XmlTree
getHttpContents
    = forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState forall a b. (a -> b) -> a -> b
$ forall (a :: * -> * -> *) b c. ArrowList a => a b (a b c) -> a b c
applyA forall a b. (a -> b) -> a -> b
$ forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (IOSLA (XIOState ()) XmlTree XmlTree)
theHttpHandler

getContentsFromString   :: IOStateArrow s XmlTree XmlTree
getContentsFromString :: forall s. IOStateArrow s XmlTree XmlTree
getContentsFromString
    = ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
        forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        forall (a :: * -> * -> *) b. ArrowList a => (b -> Bool) -> a b b
isA (forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf String
stringProtocol)
      )
      forall (a :: * -> * -> *) b c d.
ArrowIf a =>
a b c -> a b d -> a b d
`guards`
      forall s. IOStateArrow s XmlTree XmlTree
getStringContents

getContentsFromDoc      :: IOStateArrow s XmlTree XmlTree
getContentsFromDoc :: forall s. IOStateArrow s XmlTree XmlTree
getContentsFromDoc
    = ( ( forall {cat :: * -> * -> *}.
ArrowXml cat =>
String -> cat XmlTree XmlTree
addTransferURI forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall s b. IOStateArrow s b String
getBaseURI
          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
          forall s. IOStateArrow s XmlTree XmlTree
getCont
        )
        forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
        ( forall s b. String -> IOStateArrow s b b
setAbsURI forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source
                         forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> (c -> d) -> a b d
>>^
                         ( \ String
src-> (if forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
src then String
"stdin:" else String
src) )   -- empty document name -> read from stdin
                       )
        )
      )
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"getContentsFromDoc"
    where
    setAbsURI :: String -> IOSLA (XIOState s) d d
setAbsURI String
src
        = forall (a :: * -> * -> *) b c d.
ArrowIf a =>
a b c -> a b d -> a b d -> a b d
ifA ( forall (a :: * -> * -> *) c b. ArrowList a => c -> a b c
constA String
src forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall s. IOStateArrow s String String
changeBaseURI )
          forall (a :: * -> * -> *) b. ArrowList a => a b b
this
          ( forall s b. String -> IOStateArrow s b b
issueFatal (String
"illegal URI : " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show String
src) )

    addTransferURI :: String -> a XmlTree XmlTree
addTransferURI String
uri
        = forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferURI String
uri

    getCont :: IOSLA (XIOState s) XmlTree XmlTree
getCont
        = forall (a :: * -> * -> *) b c. ArrowList a => a b (a b c) -> a b c
applyA ( forall s b. IOStateArrow s b String
getBaseURI                           -- compute the handler and call it
                   forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                   forall b s. Int -> (b -> String) -> IOStateArrow s b b
traceValue Int
2 ((String
"getContentsFromDoc: reading " forall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show)
                   forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                   forall (a :: * -> * -> *). ArrowList a => a String String
getSchemeFromURI
                   forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                   forall s. IOStateArrow s String (IOStateArrow s XmlTree XmlTree)
getProtocolHandler
                 )
          forall (a :: * -> * -> *) b c. ArrowIf a => a b c -> a b c -> a b c
`orElse`
          forall (a :: * -> * -> *) b. ArrowList a => a b b
this                                          -- don't change tree, when no handler can be found

setBaseURIFromDoc       :: IOStateArrow s XmlTree XmlTree
setBaseURIFromDoc :: forall s. IOStateArrow s XmlTree XmlTree
setBaseURIFromDoc
    = forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
transferURI
                forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                forall (a :: * -> * -> *) b. ArrowList a => (b -> Bool) -> a b b
isA (forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf String
stringProtocol)         -- do not change base URI when reading from a string
                forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                forall s. IOStateArrow s String String
setBaseURI
              )

{- |
   Read the content of a document.

   This routine is usually called from 'Text.XML.HXT.Arrow.ProcessDocument.getDocumentContents'.

   The input must be a root node (constructed with 'Text.XML.HXT.Arrow.XmlArrow.root'), usually without children.
   The attribute list contains all input parameters, e.g. URI or source file name, encoding preferences, ...
   If the source name is empty, the input is read from standard input.

   The source is transformed into an absolute URI. If the source is a relative URI, or a file name,
   it is expanded into an absolute URI with respect to the current base URI.
   The default base URI is of protocol \"file\" and points to the current working directory.

   The currently supported protocols are \"http\", \"file\", \"stdin\" and \"string\".

   The latter two are internal protocols. An uri of the form \"stdin:\" stands for the content of
   the standard input stream.

   \"string:some text\" means, that \"some text\" is taken as input.
   This internal protocol is used for reading from normal 'String' values.

-}

getXmlContents          :: IOStateArrow s XmlTree XmlTree
getXmlContents :: forall s. IOStateArrow s XmlTree XmlTree
getXmlContents
    = forall s.
IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
getXmlContents' forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
parseXmlDocEncodingSpec
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall s. IOStateArrow s XmlTree XmlTree
setBaseURIFromDoc

getXmlEntityContents            :: IOStateArrow s XmlTree XmlTree
getXmlEntityContents :: forall s. IOStateArrow s XmlTree XmlTree
getXmlEntityContents
    = forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 String
"getXmlEntityContents"
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferMimeType String
text_xml_external_parsed_entity  -- the default transfer mimetype
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall s.
IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
getXmlContents' forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
parseXmlEntityEncodingSpec
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferMimeType String
text_xml_external_parsed_entity
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
processChildren
      ( forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
removeEncodingSpec
        forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
        forall (a :: * -> * -> *).
ArrowXml a =>
(String -> String) -> a XmlTree XmlTree
changeText String -> String
normalizeNL                  -- newline normalization must be done here
      )                                         -- the following calls of the parsers don't do this
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall s. IOStateArrow s XmlTree XmlTree
setBaseURIFromDoc
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 String
"getXmlEntityContents done"

getXmlContents'         :: IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
getXmlContents' :: forall s.
IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree
getXmlContents' IOStateArrow s XmlTree XmlTree
parseEncodingSpec
    = ( forall s. IOStateArrow s XmlTree XmlTree
getContentsFromString    -- no decoding done for string: protocol
        forall (a :: * -> * -> *) b c. ArrowIf a => a b c -> a b c -> a b c
`orElse`
        ( forall s. IOStateArrow s XmlTree XmlTree
getContentsFromDoc
          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
          forall (a :: * -> * -> *) b c d.
ArrowIf a =>
[IfThen (a b c) (a b d)] -> a b d
choiceA
          [ forall s. IOStateArrow s XmlTree XmlTree
isXmlHtmlDoc  forall a b. a -> b -> IfThen a b
:-> ( IOStateArrow s XmlTree XmlTree
parseEncodingSpec
                                forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                                forall s. IOStateArrow s XmlTree XmlTree
filterErrorMsg
                                forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                                forall s. IOStateArrow s XmlTree XmlTree
decodeDocument
                              )
          , forall s. IOStateArrow s XmlTree XmlTree
isTextDoc     forall a b. a -> b -> IfThen a b
:-> forall s. IOStateArrow s XmlTree XmlTree
decodeDocument
          , forall (a :: * -> * -> *) b. ArrowList a => a b b
this          forall a b. a -> b -> IfThen a b
:-> forall (a :: * -> * -> *) b. ArrowList a => a b b
this
          ]
          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
          forall (a :: * -> * -> *) b c. ArrowList a => a b c -> a b b
perform ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
transferURI
                    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                    forall b s. Int -> (b -> String) -> IOStateArrow s b b
traceValue Int
1 ((String
"getXmlContents: content read and decoded for " forall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show)
                  )
          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
          forall s. String -> IOStateArrow s XmlTree XmlTree
traceDoc String
"getXmlContents'"
        )
      )
      forall (a :: * -> * -> *) b c. ArrowIf a => a b b -> a b c -> a b b
`when`
      forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isRoot

isMimeDoc               :: (String -> Bool) -> IOStateArrow s XmlTree XmlTree
isMimeDoc :: forall s. (String -> Bool) -> IOStateArrow s XmlTree XmlTree
isMimeDoc String -> Bool
isMT          = forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA forall a b. (a -> b) -> a -> b
$
                          ( ( forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
transferMimeType forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> (c -> d) -> a b d
>>^ String -> String
stringToLower )
                            forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                            forall (a :: * -> * -> *) b. ArrowList a => (b -> Bool) -> a b b
isA (\ String
t -> forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
t Bool -> Bool -> Bool
|| String -> Bool
isMT String
t)
                          )
                          forall (a :: * -> * -> *) b c d.
ArrowIf a =>
a b c -> a b d -> a b d
`guards` forall (a :: * -> * -> *) b. ArrowList a => a b b
this

isTextDoc, isXmlHtmlDoc :: IOStateArrow s XmlTree XmlTree

isTextDoc :: forall s. IOStateArrow s XmlTree XmlTree
isTextDoc               = forall s. (String -> Bool) -> IOStateArrow s XmlTree XmlTree
isMimeDoc String -> Bool
isTextMimeType

isXmlHtmlDoc :: forall s. IOStateArrow s XmlTree XmlTree
isXmlHtmlDoc            = forall s. (String -> Bool) -> IOStateArrow s XmlTree XmlTree
isMimeDoc (\ String
mt -> String -> Bool
isHtmlMimeType String
mt Bool -> Bool -> Bool
|| String -> Bool
isXmlMimeType String
mt)

-- ------------------------------------------------------------

getEncoding     :: IOStateArrow s XmlTree String
getEncoding :: forall s. IOStateArrow s XmlTree String
getEncoding
    = forall (a :: * -> * -> *) b c. ArrowList a => [a b c] -> a b c
catA [ forall (a :: * -> * -> *) n.
ArrowXml a =>
a n XmlTree -> a n String
xshow forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b)
getChildren                  -- 1. guess: guess encoding by looking at the first few bytes
             forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
             forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr String -> String
guessEncoding
           , forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
transferEncoding      -- 2. guess: take the transfer encoding
           , forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_encoding            -- 3. guess: take encoding parameter in root node
           , forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar  Selector XIOSysState String
theInputEncoding        -- 4. guess: take encoding parameter in global state
           , forall (a :: * -> * -> *) c b. ArrowList a => c -> a b c
constA String
utf8                        -- default : utf8
           ]
      forall (a :: * -> * -> *) b c d.
ArrowList a =>
a b c -> ([c] -> d) -> a b d
>. (forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Bool
null))           -- make the filter deterministic: take 1. entry from list of guesses

getTextEncoding :: IOStateArrow s XmlTree String
getTextEncoding :: forall s. IOStateArrow s XmlTree String
getTextEncoding
    = forall (a :: * -> * -> *) b c. ArrowList a => [a b c] -> a b c
catA [ forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
transferEncoding      -- 1. guess: take the transfer encoding
           , forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_encoding            -- 2. guess: take encoding parameter in root node
           , forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState String
theInputEncoding         -- 3. guess: take encoding parameter in global state
           , forall (a :: * -> * -> *) c b. ArrowList a => c -> a b c
constA String
isoLatin1                   -- default : no encoding
           ]
      forall (a :: * -> * -> *) b c d.
ArrowList a =>
a b c -> ([c] -> d) -> a b d
>. (forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Bool
null))           -- make the filter deterministic: take 1. entry from list of guesses


decodeDocument  :: IOStateArrow s XmlTree XmlTree
decodeDocument :: forall s. IOStateArrow s XmlTree XmlTree
decodeDocument
    = forall (a :: * -> * -> *) b c d.
ArrowIf a =>
[IfThen (a b c) (a b d)] -> a b d
choiceA
      [ ( forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isRoot forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall s. IOStateArrow s XmlTree XmlTree
isXmlHtmlDoc )   forall a b. a -> b -> IfThen a b
:-> ( forall s. Bool -> IOStateArrow s XmlTree XmlTree
decodeX   forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState Bool
theExpat)
      , ( forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isRoot forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall s. IOStateArrow s XmlTree XmlTree
isTextDoc )      forall a b. a -> b -> IfThen a b
:-> ( forall s. String -> IOStateArrow s XmlTree XmlTree
decodeArr forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall s. IOStateArrow s XmlTree String
getTextEncoding )
      , forall (a :: * -> * -> *) b. ArrowList a => a b b
this                          forall a b. a -> b -> IfThen a b
:-> forall (a :: * -> * -> *) b. ArrowList a => a b b
this
      ]
    where
    decodeX             :: Bool -> IOStateArrow s XmlTree XmlTree
    decodeX :: forall s. Bool -> IOStateArrow s XmlTree XmlTree
decodeX Bool
False       = forall s. String -> IOStateArrow s XmlTree XmlTree
decodeArr forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall s. IOStateArrow s XmlTree String
getEncoding
    decodeX Bool
True        = forall s. String -> IOStateArrow s XmlTree XmlTree
noDecode  forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall s. IOStateArrow s XmlTree String
getEncoding         -- parse with expat

    noDecode :: String -> IOSLA (XIOState s) XmlTree XmlTree
noDecode String
enc        = forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 (String
"no decoding (done by expat): encoding is " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show String
enc)
                          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                          forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferEncoding String
enc

    decodeArr   :: String -> IOStateArrow s XmlTree XmlTree
    decodeArr :: forall s. String -> IOStateArrow s XmlTree XmlTree
decodeArr String
enc
        = forall b a. b -> (a -> b) -> Maybe a -> b
maybe forall s. IOStateArrow s XmlTree XmlTree
notFound forall {s}.
(String -> (String, [String]))
-> IOSLA (XIOState s) XmlTree XmlTree
found forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe (String -> (String, [String]))
getDecodingFct forall a b. (a -> b) -> a -> b
$ String
enc
        where
        found :: (String -> (String, [String]))
-> IOSLA (XIOState s) XmlTree XmlTree
found String -> (String, [String])
df
            = forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 (String
"decodeDocument: encoding is " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show String
enc)
              forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
              ( forall {s}.
(String -> (String, [String]))
-> Bool -> IOSLA (XIOState s) XmlTree XmlTree
decodeText String -> (String, [String])
df forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState Bool
theEncodingErrors )
              forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
              forall (a :: * -> * -> *).
ArrowXml a =>
String -> String -> a XmlTree XmlTree
addAttr String
transferEncoding String
enc

        notFound :: IOSLA (XIOState s) XmlTree XmlTree
notFound
            = forall s b. String -> IOStateArrow s b b
issueFatal (String
"encoding scheme not supported: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show String
enc)
              forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
              forall s. String -> IOStateArrow s XmlTree XmlTree
setDocumentStatusFromSystemState String
"decoding document"

{- just for performance test
        decodeText _ _ = this
-}
        decodeText :: (String -> (String, [String]))
-> Bool -> IOSLA (XIOState s) XmlTree XmlTree
decodeText String -> (String, [String])
df Bool
withEncErrors
            = forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
processChildren
              ( forall (a :: * -> * -> *). ArrowXml a => a XmlTree String
getText                                                 -- get the document content
                -- the following 3 lines
                -- don't seem to raise the space problem in decodeText
                -- space is allocated in blobToString and in parsec
                forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr String -> (String, [String])
df                                              -- decode the text, result is (string, [errMsg])
                forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> ( ( forall a b. (a, b) -> a
fst forall (a :: * -> * -> *) b c d.
Arrow a =>
(b -> c) -> a c d -> a b d
^>> forall (a :: * -> * -> *). ArrowXml a => a String XmlTree
mkText )                                -- take decoded string and build text node
                      forall (a :: * -> * -> *) b c.
ArrowPlus a =>
a b c -> a b c -> a b c
<+>
                      ( if Bool
withEncErrors
                        then
                        ( forall (a :: * -> * -> *) b c. ArrowList a => (b -> [c]) -> a b c
arrL forall a b. (a, b) -> b
snd                                      -- take the error messages
                          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                          forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr ((String
enc forall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
" encoding error" forall a. [a] -> [a] -> [a]
++))       -- prefix with enc error
                          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                          forall (a :: * -> * -> *) b c. ArrowList a => a b (a b c) -> a b c
applyA (forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr forall s b. String -> IOStateArrow s b b
issueErr)                         -- build issueErr arrow and apply
                          forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                          forall (a :: * -> * -> *) b c. ArrowList a => a b c
none                                          -- neccessary for type match with <+>
                        )
                        else forall (a :: * -> * -> *) b c. ArrowList a => a b c
none
                      )
                    )
              )

-- ------------------------------------------------------------