module Language.Haskell.Lexer
  ( PosToken
  , Token(..)
  , lexerPass0
  , lexerPass0'
  , lexerPass1
  , rmSpace
  , layoutPre
  , module Language.Haskell.Lexer.Position
  ) where

import Language.Haskell.Lexer.Lex(haskellLex)
import Language.Haskell.Lexer.Utils
import Language.Haskell.Lexer.Layout(layoutPre,PosToken)
import Language.Haskell.Lexer.Position
import Data.List(mapAccumL)

default(Int)

-- | The function 'lexerPass1' handles the part of lexical analysis that
-- can be done independently of the parser---the tokenization and the
-- addition of the extra layout tokens \<n\> and {n}, as specified in
-- section 9.3 of the revised Haskell 98 Report.
lexerPass1 :: String -> [PosToken]
lexerPass1 :: String -> [PosToken]
lexerPass1 = [PosToken] -> [PosToken]
lexerPass1Only forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [PosToken]
lexerPass0

lexerPass1Only :: [PosToken] -> [PosToken]
lexerPass1Only :: [PosToken] -> [PosToken]
lexerPass1Only = [PosToken] -> [PosToken]
layoutPre forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PosToken] -> [PosToken]
rmSpace

-- | Remove token that are not meaningful (e.g., white space and comments).
rmSpace :: [PosToken] -> [PosToken]
rmSpace :: [PosToken] -> [PosToken]
rmSpace = forall a. (a -> Bool) -> [a] -> [a]
filter (Token -> Bool
notWhiteforall b c a. (b -> c) -> (a -> b) -> a -> c
.forall a b. (a, b) -> a
fst)

notWhite :: Token -> Bool
notWhite :: Token -> Bool
notWhite Token
t = Token
tforall a. Eq a => a -> a -> Bool
/=Token
Whitespace Bool -> Bool -> Bool
&&
             Token
tforall a. Eq a => a -> a -> Bool
/=Token
Commentstart Bool -> Bool -> Bool
&& Token
tforall a. Eq a => a -> a -> Bool
/=Token
Comment Bool -> Bool -> Bool
&&
             Token
tforall a. Eq a => a -> a -> Bool
/=Token
NestedComment

-- | Tokenize and add position information.  Preserves white space,
-- and does not insert extra tokens due to layout.
lexerPass0 :: String -> [PosToken]
lexerPass0 :: String -> [PosToken]
lexerPass0 = Pos -> String -> [PosToken]
lexerPass0' Pos
startPos

-- | Same as 'lexerPass0', except that it uses the given start position.
lexerPass0' :: Pos -> String -> [PosToken]
lexerPass0' :: Pos -> String -> [PosToken]
lexerPass0' Pos
pos0 = forall {a}. [(a, String)] -> [(a, (Pos, String))]
addPos forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [(Token, String)]
haskellLex forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
rmcr
  where
    addPos :: [(a, String)] -> [(a, (Pos, String))]
addPos = forall a b. (a, b) -> b
snd forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) s a b.
Traversable t =>
(s -> a -> (s, b)) -> s -> t a -> (s, t b)
mapAccumL forall {a}. Pos -> (a, String) -> (Pos, (a, (Pos, String)))
pos Pos
pos0

    pos :: Pos -> (a, String) -> (Pos, (a, (Pos, String)))
pos Pos
p (a
t,String
s) = {-seq p'-} (Pos
p',(a
t,(Pos
p,String
s)))
        where p' :: Pos
p' = Pos -> String -> Pos
nextPos Pos
p String
s
--      where s = reverse r


-- | Since #nextPos# examines one character at a time, it will increase the line
-- number by 2 if it sees \CR\LF, which can happen when reading DOS files on
-- a Unix like system.
-- Since the extra \CR characters can cause trouble later as well, we choose
-- to simply remove them here.
rmcr :: String -> String
rmcr :: String -> String
rmcr (Char
'\CR':Char
'\LF':String
s) = Char
'\LF'forall a. a -> [a] -> [a]
:String -> String
rmcr String
s
rmcr (Char
c:String
s) = Char
cforall a. a -> [a] -> [a]
:String -> String
rmcr String
s
rmcr String
"" = String
""