{-# LANGUAGE BangPatterns #-}
module Data.Attoparsec.Text.FastSet
(
FastSet
, fromList
, set
, member
, charClass
) where
import Data.Bits ((.|.), (.&.), shiftR)
import Data.Function (on)
import Data.List (sort, sortBy)
import qualified Data.Array.Base as AB
import qualified Data.Array.Unboxed as A
import qualified Data.Text as T
data FastSet = FastSet {
FastSet -> UArray Int Int
table :: {-# UNPACK #-} !(A.UArray Int Int)
, FastSet -> Int
mask :: {-# UNPACK #-} !Int
}
data Entry = Entry {
Entry -> Char
key :: {-# UNPACK #-} !Char
, Entry -> Int
initialIndex :: {-# UNPACK #-} !Int
, Entry -> Int
index :: {-# UNPACK #-} !Int
}
offset :: Entry -> Int
offset :: Entry -> Int
offset Entry
e = Entry -> Int
index Entry
e forall a. Num a => a -> a -> a
- Entry -> Int
initialIndex Entry
e
resolveCollisions :: [Entry] -> [Entry]
resolveCollisions :: [Entry] -> [Entry]
resolveCollisions [] = []
resolveCollisions [Entry
e] = [Entry
e]
resolveCollisions (Entry
a:Entry
b:[Entry]
entries) = Entry
a' forall a. a -> [a] -> [a]
: [Entry] -> [Entry]
resolveCollisions (Entry
b' forall a. a -> [a] -> [a]
: [Entry]
entries)
where (Entry
a', Entry
b')
| Entry -> Int
index Entry
a forall a. Ord a => a -> a -> Bool
< Entry -> Int
index Entry
b = (Entry
a, Entry
b)
| Entry -> Int
offset Entry
a forall a. Ord a => a -> a -> Bool
< Entry -> Int
offset Entry
b = (Entry
b { index :: Int
index=Entry -> Int
index Entry
a }, Entry
a { index :: Int
index=Entry -> Int
index Entry
a forall a. Num a => a -> a -> a
+ Int
1 })
| Bool
otherwise = (Entry
a, Entry
b { index :: Int
index=Entry -> Int
index Entry
a forall a. Num a => a -> a -> a
+ Int
1 })
pad :: Int -> [Entry] -> [Entry]
pad :: Int -> [Entry] -> [Entry]
pad = Int -> Int -> [Entry] -> [Entry]
go Int
0
where
go :: Int -> Int -> [Entry] -> [Entry]
go !Int
_ !Int
m [] = forall a. Int -> a -> [a]
replicate (forall a. Ord a => a -> a -> a
max Int
1 Int
m forall a. Num a => a -> a -> a
+ Int
1) Entry
empty
go Int
k Int
m (Entry
e:[Entry]
entries) = forall a b. (a -> b) -> [a] -> [b]
map (forall a b. a -> b -> a
const Entry
empty) [Int
k..Int
i forall a. Num a => a -> a -> a
- Int
1] forall a. [a] -> [a] -> [a]
++ Entry
e forall a. a -> [a] -> [a]
:
Int -> Int -> [Entry] -> [Entry]
go (Int
i forall a. Num a => a -> a -> a
+ Int
1) (Int
m forall a. Num a => a -> a -> a
+ Int
i forall a. Num a => a -> a -> a
- Int
k forall a. Num a => a -> a -> a
- Int
1) [Entry]
entries
where i :: Int
i = Entry -> Int
index Entry
e
empty :: Entry
empty = Char -> Int -> Int -> Entry
Entry Char
'\0' forall a. Bounded a => a
maxBound Int
0
nextPowerOf2 :: Int -> Int
nextPowerOf2 :: Int -> Int
nextPowerOf2 Int
0 = Int
1
nextPowerOf2 Int
x = forall {t}. (Num t, Bits t) => t -> Int -> t
go (Int
x forall a. Num a => a -> a -> a
- Int
1) Int
1
where go :: t -> Int -> t
go t
y Int
32 = t
y forall a. Num a => a -> a -> a
+ t
1
go t
y Int
k = t -> Int -> t
go (t
y forall a. Bits a => a -> a -> a
.|. (t
y forall a. Bits a => a -> Int -> a
`shiftR` Int
k)) forall a b. (a -> b) -> a -> b
$ Int
k forall a. Num a => a -> a -> a
* Int
2
fastHash :: Char -> Int
fastHash :: Char -> Int
fastHash = forall a. Enum a => a -> Int
fromEnum
fromList :: String -> FastSet
fromList :: String -> FastSet
fromList String
s = UArray Int Int -> Int -> FastSet
FastSet (forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
AB.listArray (Int
0, forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
interleaved forall a. Num a => a -> a -> a
- Int
1) [Int]
interleaved)
Int
mask'
where s' :: String
s' = forall a. Eq a => [a] -> [a]
ordNub (forall a. Ord a => [a] -> [a]
sort String
s)
l :: Int
l = forall (t :: * -> *) a. Foldable t => t a -> Int
length String
s'
mask' :: Int
mask' = Int -> Int
nextPowerOf2 ((Int
5 forall a. Num a => a -> a -> a
* Int
l) forall a. Integral a => a -> a -> a
`div` Int
4) forall a. Num a => a -> a -> a
- Int
1
entries :: [Entry]
entries = Int -> [Entry] -> [Entry]
pad Int
mask' forall b c a. (b -> c) -> (a -> b) -> a -> c
.
[Entry] -> [Entry]
resolveCollisions forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a. Ord a => a -> a -> Ordering
compare forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Entry -> Int
initialIndex) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Char
c Int
i -> Char -> Int -> Int -> Entry
Entry Char
c Int
i Int
i) String
s' forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall a b. (a -> b) -> [a] -> [b]
map ((forall a. Bits a => a -> a -> a
.&. Int
mask') forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
fastHash) forall a b. (a -> b) -> a -> b
$ String
s'
interleaved :: [Int]
interleaved = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\Entry
e -> [forall a. Enum a => a -> Int
fromEnum forall a b. (a -> b) -> a -> b
$ Entry -> Char
key Entry
e, Entry -> Int
initialIndex Entry
e])
[Entry]
entries
ordNub :: Eq a => [a] -> [a]
ordNub :: forall a. Eq a => [a] -> [a]
ordNub [] = []
ordNub (a
y:[a]
ys) = forall {a}. Eq a => a -> [a] -> [a]
go a
y [a]
ys
where go :: a -> [a] -> [a]
go a
x (a
z:[a]
zs)
| a
x forall a. Eq a => a -> a -> Bool
== a
z = a -> [a] -> [a]
go a
x [a]
zs
| Bool
otherwise = a
x forall a. a -> [a] -> [a]
: a -> [a] -> [a]
go a
z [a]
zs
go a
x [] = [a
x]
set :: T.Text -> FastSet
set :: Text -> FastSet
set = String -> FastSet
fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack
member :: Char -> FastSet -> Bool
member :: Char -> FastSet -> Bool
member Char
c FastSet
a = Int -> Bool
go (Int
2 forall a. Num a => a -> a -> a
* Int
i)
where i :: Int
i = Char -> Int
fastHash Char
c forall a. Bits a => a -> a -> a
.&. FastSet -> Int
mask FastSet
a
lookupAt :: Int -> Bool -> Bool
lookupAt Int
j Bool
b = (Int
i' forall a. Ord a => a -> a -> Bool
<= Int
i) Bool -> Bool -> Bool
&& (Char
c forall a. Eq a => a -> a -> Bool
== Char
c' Bool -> Bool -> Bool
|| Bool
b)
where c' :: Char
c' = forall a. Enum a => Int -> a
toEnum forall a b. (a -> b) -> a -> b
$ forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
AB.unsafeAt (FastSet -> UArray Int Int
table FastSet
a) Int
j
i' :: Int
i' = forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
AB.unsafeAt (FastSet -> UArray Int Int
table FastSet
a) forall a b. (a -> b) -> a -> b
$ Int
j forall a. Num a => a -> a -> a
+ Int
1
go :: Int -> Bool
go Int
j = Int -> Bool -> Bool
lookupAt Int
j forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool -> Bool
lookupAt (Int
j forall a. Num a => a -> a -> a
+ Int
2) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool -> Bool
lookupAt (Int
j forall a. Num a => a -> a -> a
+ Int
4) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
Int -> Bool -> Bool
lookupAt (Int
j forall a. Num a => a -> a -> a
+ Int
6) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool
go forall a b. (a -> b) -> a -> b
$ Int
j forall a. Num a => a -> a -> a
+ Int
8
charClass :: String -> FastSet
charClass :: String -> FastSet
charClass = String -> FastSet
fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
go
where go :: String -> String
go (Char
a:Char
'-':Char
b:String
xs) = [Char
a..Char
b] forall a. [a] -> [a] -> [a]
++ String -> String
go String
xs
go (Char
x:String
xs) = Char
x forall a. a -> [a] -> [a]
: String -> String
go String
xs
go String
_ = String
""