{-# LANGUAGE BangPatterns #-}

------------------------------------------------------------------------------
-- |
-- Module      :  Data.Attoparsec.FastSet
-- Copyright   :  Felipe Lessa 2010, Bryan O'Sullivan 2007-2015
-- License     :  BSD3
--
-- Maintainer  :  felipe.lessa@gmail.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- Fast set membership tests for 'Char' values. We test for
-- membership using a hashtable implemented with Robin Hood
-- collision resolution. The set representation is unboxed,
-- and the characters and hashes interleaved, for efficiency.
--
--
-----------------------------------------------------------------------------
module Data.Attoparsec.Text.FastSet
    (
    -- * Data type
      FastSet
    -- * Construction
    , fromList
    , set
    -- * Lookup
    , member
    -- * Handy interface
    , charClass
    ) where

import Data.Bits ((.|.), (.&.), shiftR)
import Data.Function (on)
import Data.List (sort, sortBy)
import qualified Data.Array.Base as AB
import qualified Data.Array.Unboxed as A
import qualified Data.Text as T

data FastSet = FastSet {
    FastSet -> UArray Int Int
table :: {-# UNPACK #-} !(A.UArray Int Int)
  , FastSet -> Int
mask  :: {-# UNPACK #-} !Int
  }

data Entry = Entry {
    Entry -> Char
key          :: {-# UNPACK #-} !Char
  , Entry -> Int
initialIndex :: {-# UNPACK #-} !Int
  , Entry -> Int
index        :: {-# UNPACK #-} !Int
  }

offset :: Entry -> Int
offset :: Entry -> Int
offset Entry
e = Entry -> Int
index Entry
e forall a. Num a => a -> a -> a
- Entry -> Int
initialIndex Entry
e

resolveCollisions :: [Entry] -> [Entry]
resolveCollisions :: [Entry] -> [Entry]
resolveCollisions [] = []
resolveCollisions [Entry
e] = [Entry
e]
resolveCollisions (Entry
a:Entry
b:[Entry]
entries) = Entry
a' forall a. a -> [a] -> [a]
: [Entry] -> [Entry]
resolveCollisions (Entry
b' forall a. a -> [a] -> [a]
: [Entry]
entries)
  where (Entry
a', Entry
b')
          | Entry -> Int
index Entry
a forall a. Ord a => a -> a -> Bool
< Entry -> Int
index Entry
b   = (Entry
a, Entry
b)
          | Entry -> Int
offset Entry
a forall a. Ord a => a -> a -> Bool
< Entry -> Int
offset Entry
b = (Entry
b { index :: Int
index=Entry -> Int
index Entry
a }, Entry
a { index :: Int
index=Entry -> Int
index Entry
a forall a. Num a => a -> a -> a
+ Int
1 })
          | Bool
otherwise           = (Entry
a, Entry
b { index :: Int
index=Entry -> Int
index Entry
a forall a. Num a => a -> a -> a
+ Int
1 })

pad :: Int -> [Entry] -> [Entry]
pad :: Int -> [Entry] -> [Entry]
pad = Int -> Int -> [Entry] -> [Entry]
go Int
0
  where -- ensure that we pad enough so that lookups beyond the
        -- last hash in the table fall within the array
        go :: Int -> Int -> [Entry] -> [Entry]
go !Int
_ !Int
m []          = forall a. Int -> a -> [a]
replicate (forall a. Ord a => a -> a -> a
max Int
1 Int
m forall a. Num a => a -> a -> a
+ Int
1) Entry
empty
        go  Int
k  Int
m (Entry
e:[Entry]
entries) = forall a b. (a -> b) -> [a] -> [b]
map (forall a b. a -> b -> a
const Entry
empty) [Int
k..Int
i forall a. Num a => a -> a -> a
- Int
1] forall a. [a] -> [a] -> [a]
++ Entry
e forall a. a -> [a] -> [a]
:
                               Int -> Int -> [Entry] -> [Entry]
go (Int
i forall a. Num a => a -> a -> a
+ Int
1) (Int
m forall a. Num a => a -> a -> a
+ Int
i forall a. Num a => a -> a -> a
- Int
k forall a. Num a => a -> a -> a
- Int
1) [Entry]
entries
          where i :: Int
i            = Entry -> Int
index Entry
e
        empty :: Entry
empty                = Char -> Int -> Int -> Entry
Entry Char
'\0' forall a. Bounded a => a
maxBound Int
0

nextPowerOf2 :: Int -> Int
nextPowerOf2 :: Int -> Int
nextPowerOf2 Int
0  = Int
1
nextPowerOf2 Int
x  = forall {t}. (Num t, Bits t) => t -> Int -> t
go (Int
x forall a. Num a => a -> a -> a
- Int
1) Int
1
  where go :: t -> Int -> t
go t
y Int
32 = t
y forall a. Num a => a -> a -> a
+ t
1
        go t
y Int
k  = t -> Int -> t
go (t
y forall a. Bits a => a -> a -> a
.|. (t
y forall a. Bits a => a -> Int -> a
`shiftR` Int
k)) forall a b. (a -> b) -> a -> b
$ Int
k forall a. Num a => a -> a -> a
* Int
2

fastHash :: Char -> Int
fastHash :: Char -> Int
fastHash = forall a. Enum a => a -> Int
fromEnum

fromList :: String -> FastSet
fromList :: String -> FastSet
fromList String
s = UArray Int Int -> Int -> FastSet
FastSet (forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
AB.listArray (Int
0, forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
interleaved forall a. Num a => a -> a -> a
- Int
1) [Int]
interleaved)
             Int
mask'
  where s' :: String
s'      = forall a. Eq a => [a] -> [a]
ordNub (forall a. Ord a => [a] -> [a]
sort String
s)
        l :: Int
l       = forall (t :: * -> *) a. Foldable t => t a -> Int
length String
s'
        mask' :: Int
mask'   = Int -> Int
nextPowerOf2 ((Int
5 forall a. Num a => a -> a -> a
* Int
l) forall a. Integral a => a -> a -> a
`div` Int
4) forall a. Num a => a -> a -> a
- Int
1
        entries :: [Entry]
entries = Int -> [Entry] -> [Entry]
pad Int
mask' forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  [Entry] -> [Entry]
resolveCollisions forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a. Ord a => a -> a -> Ordering
compare forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Entry -> Int
initialIndex) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Char
c Int
i -> Char -> Int -> Int -> Entry
Entry Char
c Int
i Int
i) String
s' forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  forall a b. (a -> b) -> [a] -> [b]
map ((forall a. Bits a => a -> a -> a
.&. Int
mask') forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
fastHash) forall a b. (a -> b) -> a -> b
$ String
s'
        interleaved :: [Int]
interleaved = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\Entry
e -> [forall a. Enum a => a -> Int
fromEnum forall a b. (a -> b) -> a -> b
$ Entry -> Char
key Entry
e, Entry -> Int
initialIndex Entry
e])
                      [Entry]
entries

ordNub :: Eq a => [a] -> [a]
ordNub :: forall a. Eq a => [a] -> [a]
ordNub []     = []
ordNub (a
y:[a]
ys) = forall {a}. Eq a => a -> [a] -> [a]
go a
y [a]
ys
  where go :: a -> [a] -> [a]
go a
x (a
z:[a]
zs)
          | a
x forall a. Eq a => a -> a -> Bool
== a
z    = a -> [a] -> [a]
go a
x [a]
zs
          | Bool
otherwise = a
x forall a. a -> [a] -> [a]
: a -> [a] -> [a]
go a
z [a]
zs
        go a
x []       = [a
x]

set :: T.Text -> FastSet
set :: Text -> FastSet
set = String -> FastSet
fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack

-- | Check the set for membership.
member :: Char -> FastSet -> Bool
member :: Char -> FastSet -> Bool
member Char
c FastSet
a           = Int -> Bool
go (Int
2 forall a. Num a => a -> a -> a
* Int
i)
  where i :: Int
i            = Char -> Int
fastHash Char
c forall a. Bits a => a -> a -> a
.&. FastSet -> Int
mask FastSet
a
        lookupAt :: Int -> Bool -> Bool
lookupAt Int
j Bool
b = (Int
i' forall a. Ord a => a -> a -> Bool
<= Int
i) Bool -> Bool -> Bool
&& (Char
c forall a. Eq a => a -> a -> Bool
== Char
c' Bool -> Bool -> Bool
|| Bool
b)
            where c' :: Char
c' = forall a. Enum a => Int -> a
toEnum forall a b. (a -> b) -> a -> b
$ forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
AB.unsafeAt (FastSet -> UArray Int Int
table FastSet
a) Int
j
                  i' :: Int
i' = forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
AB.unsafeAt (FastSet -> UArray Int Int
table FastSet
a) forall a b. (a -> b) -> a -> b
$ Int
j forall a. Num a => a -> a -> a
+ Int
1
        go :: Int -> Bool
go Int
j         = Int -> Bool -> Bool
lookupAt Int
j forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool -> Bool
lookupAt (Int
j forall a. Num a => a -> a -> a
+ Int
2) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool -> Bool
lookupAt (Int
j forall a. Num a => a -> a -> a
+ Int
4) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                       Int -> Bool -> Bool
lookupAt (Int
j forall a. Num a => a -> a -> a
+ Int
6) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool
go forall a b. (a -> b) -> a -> b
$ Int
j forall a. Num a => a -> a -> a
+ Int
8

charClass :: String -> FastSet
charClass :: String -> FastSet
charClass = String -> FastSet
fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
go
  where go :: String -> String
go (Char
a:Char
'-':Char
b:String
xs) = [Char
a..Char
b] forall a. [a] -> [a] -> [a]
++ String -> String
go String
xs
        go (Char
x:String
xs)       = Char
x forall a. a -> [a] -> [a]
: String -> String
go String
xs
        go String
_            = String
""