{-# LANGUAGE CPP, DeriveDataTypeable, BangPatterns #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Codec.Archive.Tar.Read
-- Copyright   :  (c) 2007 Bjorn Bringert,
--                    2008 Andrea Vezzosi,
--                    2008-2009 Duncan Coutts,
--                    2011 Max Bolingbroke
-- License     :  BSD3
--
-- Maintainer  :  duncan@community.haskell.org
-- Portability :  portable
--
-----------------------------------------------------------------------------
module Codec.Archive.Tar.Read (read, FormatError(..)) where

import Codec.Archive.Tar.Types

import Data.Char     (ord)
import Data.Int      (Int64)
import Data.Bits     (Bits(shiftL))
import Control.Exception (Exception(..))
import Data.Typeable (Typeable)
import Control.Applicative
import Control.Monad
import Control.DeepSeq

import qualified Data.ByteString        as BS
import qualified Data.ByteString.Char8  as BS.Char8
import qualified Data.ByteString.Unsafe as BS
import qualified Data.ByteString.Lazy   as LBS

import Prelude hiding (read)

#if !MIN_VERSION_bytestring(0,10,0)
import Data.Monoid (Monoid(..))
import qualified Data.ByteString.Lazy.Internal as LBS
#endif

-- | Errors that can be encountered when parsing a Tar archive.
data FormatError
  = TruncatedArchive
  | ShortTrailer
  | BadTrailer
  | TrailingJunk
  | ChecksumIncorrect
  | NotTarFormat
  | UnrecognisedTarFormat
  | HeaderBadNumericEncoding
#if MIN_VERSION_base(4,8,0)
  deriving (FormatError -> FormatError -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: FormatError -> FormatError -> Bool
$c/= :: FormatError -> FormatError -> Bool
== :: FormatError -> FormatError -> Bool
$c== :: FormatError -> FormatError -> Bool
Eq, Int -> FormatError -> ShowS
[FormatError] -> ShowS
FormatError -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [FormatError] -> ShowS
$cshowList :: [FormatError] -> ShowS
show :: FormatError -> String
$cshow :: FormatError -> String
showsPrec :: Int -> FormatError -> ShowS
$cshowsPrec :: Int -> FormatError -> ShowS
Show, Typeable)

instance Exception FormatError where
  displayException :: FormatError -> String
displayException FormatError
TruncatedArchive         = String
"truncated tar archive"
  displayException FormatError
ShortTrailer             = String
"short tar trailer"
  displayException FormatError
BadTrailer               = String
"bad tar trailer"
  displayException FormatError
TrailingJunk             = String
"tar file has trailing junk"
  displayException FormatError
ChecksumIncorrect        = String
"tar checksum error"
  displayException FormatError
NotTarFormat             = String
"data is not in tar format"
  displayException FormatError
UnrecognisedTarFormat    = String
"tar entry not in a recognised format"
  displayException FormatError
HeaderBadNumericEncoding = String
"tar header is malformed (bad numeric encoding)"
#else
  deriving (Eq, Typeable)

instance Show FormatError where
  show TruncatedArchive         = "truncated tar archive"
  show ShortTrailer             = "short tar trailer"
  show BadTrailer               = "bad tar trailer"
  show TrailingJunk             = "tar file has trailing junk"
  show ChecksumIncorrect        = "tar checksum error"
  show NotTarFormat             = "data is not in tar format"
  show UnrecognisedTarFormat    = "tar entry not in a recognised format"
  show HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)"

instance Exception FormatError
#endif

instance NFData    FormatError where
  rnf :: FormatError -> ()
rnf !FormatError
_ = () -- enumerations are fully strict by construction

-- | Convert a data stream in the tar file format into an internal data
-- structure. Decoding errors are reported by the 'Fail' constructor of the
-- 'Entries' type.
--
-- * The conversion is done lazily.
--
read :: LBS.ByteString -> Entries FormatError
read :: ByteString -> Entries FormatError
read = forall a e. (a -> Either e (Maybe (Entry, a))) -> a -> Entries e
unfoldEntries ByteString -> Either FormatError (Maybe (Entry, ByteString))
getEntry

getEntry :: LBS.ByteString -> Either FormatError (Maybe (Entry, LBS.ByteString))
getEntry :: ByteString -> Either FormatError (Maybe (Entry, ByteString))
getEntry ByteString
bs
  | ByteString -> Int
BS.length ByteString
header forall a. Ord a => a -> a -> Bool
< Int
512 = forall a b. a -> Either a b
Left FormatError
TruncatedArchive

  -- Tar files end with at least two blocks of all '0'. Checking this serves
  -- two purposes. It checks the format but also forces the tail of the data
  -- which is necessary to close the file if it came from a lazily read file.
  | HasCallStack => ByteString -> Word8
LBS.head ByteString
bs forall a. Eq a => a -> a -> Bool
== Word8
0 = case Int64 -> ByteString -> (ByteString, ByteString)
LBS.splitAt Int64
1024 ByteString
bs of
      (ByteString
end, ByteString
trailing)
        | ByteString -> Int64
LBS.length ByteString
end forall a. Eq a => a -> a -> Bool
/= Int64
1024        -> forall a b. a -> Either a b
Left FormatError
ShortTrailer
        | Bool -> Bool
not ((Word8 -> Bool) -> ByteString -> Bool
LBS.all (forall a. Eq a => a -> a -> Bool
== Word8
0) ByteString
end)      -> forall a b. a -> Either a b
Left FormatError
BadTrailer
        | Bool -> Bool
not ((Word8 -> Bool) -> ByteString -> Bool
LBS.all (forall a. Eq a => a -> a -> Bool
== Word8
0) ByteString
trailing) -> forall a b. a -> Either a b
Left FormatError
TrailingJunk
        | Bool
otherwise                     -> forall a b. b -> Either a b
Right forall a. Maybe a
Nothing

  | Bool
otherwise  = forall e a. Partial e a -> Either e a
partial forall a b. (a -> b) -> a -> b
$ do

  case (Partial FormatError Int
chksum_, Partial FormatError Format
format_) of
    (Ok Int
chksum, Partial FormatError Format
_   ) | ByteString -> Int -> Bool
correctChecksum ByteString
header Int
chksum -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
    (Ok Int
_,      Ok Format
_) -> forall e a. e -> Partial e a
Error FormatError
ChecksumIncorrect
    (Partial FormatError Int, Partial FormatError Format)
_                 -> forall e a. e -> Partial e a
Error FormatError
NotTarFormat

  -- These fields are partial, have to check them
  Format
format   <- Partial FormatError Format
format_;   Permissions
mode     <- Partial FormatError Permissions
mode_;
  Int
uid      <- Partial FormatError Int
uid_;      Int
gid      <- Partial FormatError Int
gid_;
  Int64
size     <- Partial FormatError Int64
size_;     Int64
mtime    <- Partial FormatError Int64
mtime_;
  Int
devmajor <- Partial FormatError Int
devmajor_; Int
devminor <- Partial FormatError Int
devminor_;

  let content :: ByteString
content = Int64 -> ByteString -> ByteString
LBS.take Int64
size (Int64 -> ByteString -> ByteString
LBS.drop Int64
512 ByteString
bs)
      padding :: Int64
padding = (Int64
512 forall a. Num a => a -> a -> a
- Int64
size) forall a. Integral a => a -> a -> a
`mod` Int64
512
      bs' :: ByteString
bs'     = Int64 -> ByteString -> ByteString
LBS.drop (Int64
512 forall a. Num a => a -> a -> a
+ Int64
size forall a. Num a => a -> a -> a
+ Int64
padding) ByteString
bs

      entry :: Entry
entry = Entry {
        entryTarPath :: TarPath
entryTarPath     = ByteString -> ByteString -> TarPath
TarPath ByteString
name ByteString
prefix,
        entryContent :: EntryContent
entryContent     = case Char
typecode of
                   Char
'\0' -> ByteString -> Int64 -> EntryContent
NormalFile      ByteString
content Int64
size
                   Char
'0'  -> ByteString -> Int64 -> EntryContent
NormalFile      ByteString
content Int64
size
                   Char
'1'  -> LinkTarget -> EntryContent
HardLink        (ByteString -> LinkTarget
LinkTarget ByteString
linkname)
                   Char
'2'  -> LinkTarget -> EntryContent
SymbolicLink    (ByteString -> LinkTarget
LinkTarget ByteString
linkname)
                   Char
_ | Format
format forall a. Eq a => a -> a -> Bool
== Format
V7Format
                        -> Char -> ByteString -> Int64 -> EntryContent
OtherEntryType  Char
typecode ByteString
content Int64
size
                   Char
'3'  -> Int -> Int -> EntryContent
CharacterDevice Int
devmajor Int
devminor
                   Char
'4'  -> Int -> Int -> EntryContent
BlockDevice     Int
devmajor Int
devminor
                   Char
'5'  -> EntryContent
Directory
                   Char
'6'  -> EntryContent
NamedPipe
                   Char
'7'  -> ByteString -> Int64 -> EntryContent
NormalFile      ByteString
content Int64
size
                   Char
_    -> Char -> ByteString -> Int64 -> EntryContent
OtherEntryType  Char
typecode ByteString
content Int64
size,
        entryPermissions :: Permissions
entryPermissions = Permissions
mode,
        entryOwnership :: Ownership
entryOwnership   = String -> String -> Int -> Int -> Ownership
Ownership (ByteString -> String
BS.Char8.unpack ByteString
uname)
                                     (ByteString -> String
BS.Char8.unpack ByteString
gname) Int
uid Int
gid,
        entryTime :: Int64
entryTime        = Int64
mtime,
        entryFormat :: Format
entryFormat      = Format
format
    }

  forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just (Entry
entry, ByteString
bs'))

  where
#if MIN_VERSION_bytestring(0,10,0)
   header :: ByteString
header = ByteString -> ByteString
LBS.toStrict (Int64 -> ByteString -> ByteString
LBS.take Int64
512 ByteString
bs)
#else
   header = toStrict (LBS.take 512 bs)
   toStrict = LBS.foldrChunks mappend mempty
#endif

   name :: ByteString
name       = Int -> Int -> ByteString -> ByteString
getString   Int
0 Int
100 ByteString
header
   mode_ :: Partial FormatError Permissions
mode_      = forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Partial FormatError a
getOct    Int
100   Int
8 ByteString
header
   uid_ :: Partial FormatError Int
uid_       = forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Partial FormatError a
getOct    Int
108   Int
8 ByteString
header
   gid_ :: Partial FormatError Int
gid_       = forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Partial FormatError a
getOct    Int
116   Int
8 ByteString
header
   size_ :: Partial FormatError Int64
size_      = forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Partial FormatError a
getOct    Int
124  Int
12 ByteString
header
   mtime_ :: Partial FormatError Int64
mtime_     = forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Partial FormatError a
getOct    Int
136  Int
12 ByteString
header
   chksum_ :: Partial FormatError Int
chksum_    = forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Partial FormatError a
getOct    Int
148   Int
8 ByteString
header
   typecode :: Char
typecode   = Int -> ByteString -> Char
getByte   Int
156     ByteString
header
   linkname :: ByteString
linkname   = Int -> Int -> ByteString -> ByteString
getString Int
157 Int
100 ByteString
header
   magic :: ByteString
magic      = Int -> Int -> ByteString -> ByteString
getChars  Int
257   Int
8 ByteString
header
   uname :: ByteString
uname      = Int -> Int -> ByteString -> ByteString
getString Int
265  Int
32 ByteString
header
   gname :: ByteString
gname      = Int -> Int -> ByteString -> ByteString
getString Int
297  Int
32 ByteString
header
   devmajor_ :: Partial FormatError Int
devmajor_  = forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Partial FormatError a
getOct    Int
329   Int
8 ByteString
header
   devminor_ :: Partial FormatError Int
devminor_  = forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Partial FormatError a
getOct    Int
337   Int
8 ByteString
header
   prefix :: ByteString
prefix     = Int -> Int -> ByteString -> ByteString
getString Int
345 Int
155 ByteString
header
-- trailing   = getBytes  500  12 header

   format_ :: Partial FormatError Format
format_
     | ByteString
magic forall a. Eq a => a -> a -> Bool
== ByteString
ustarMagic = forall (m :: * -> *) a. Monad m => a -> m a
return Format
UstarFormat
     | ByteString
magic forall a. Eq a => a -> a -> Bool
== ByteString
gnuMagic   = forall (m :: * -> *) a. Monad m => a -> m a
return Format
GnuFormat
     | ByteString
magic forall a. Eq a => a -> a -> Bool
== ByteString
v7Magic    = forall (m :: * -> *) a. Monad m => a -> m a
return Format
V7Format
     | Bool
otherwise           = forall e a. e -> Partial e a
Error FormatError
UnrecognisedTarFormat

v7Magic, ustarMagic, gnuMagic :: BS.ByteString
v7Magic :: ByteString
v7Magic    = String -> ByteString
BS.Char8.pack String
"\0\0\0\0\0\0\0\0"
ustarMagic :: ByteString
ustarMagic = String -> ByteString
BS.Char8.pack String
"ustar\NUL00"
gnuMagic :: ByteString
gnuMagic   = String -> ByteString
BS.Char8.pack String
"ustar  \NUL"

correctChecksum :: BS.ByteString -> Int -> Bool
correctChecksum :: ByteString -> Int -> Bool
correctChecksum ByteString
header Int
checksum = Int
checksum forall a. Eq a => a -> a -> Bool
== Int
checksum'
  where
    -- sum of all 512 bytes in the header block,
    -- treating each byte as an 8-bit unsigned value
    sumchars :: ByteString -> Int
sumchars  = forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl' (\Int
x Word8
y -> Int
x forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
y) Int
0
    -- treating the 8 bytes of chksum as blank characters.
    checksum' :: Int
checksum' = ByteString -> Int
sumchars (Int -> ByteString -> ByteString
BS.take Int
148 ByteString
header)
              forall a. Num a => a -> a -> a
+ Int
256 -- 256 = sumchars (BS.Char8.replicate 8 ' ')
              forall a. Num a => a -> a -> a
+ ByteString -> Int
sumchars (Int -> ByteString -> ByteString
BS.drop Int
156 ByteString
header)

-- * TAR format primitive input

{-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int   #-}
{-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int64 #-}
getOct :: (Integral a, Bits a) => Int -> Int -> BS.ByteString -> Partial FormatError a
getOct :: forall a.
(Integral a, Bits a) =>
Int -> Int -> ByteString -> Partial FormatError a
getOct Int
off Int
len = forall {a}.
(Integral a, Bits a) =>
ByteString -> Partial FormatError a
parseOct
               forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.Char8.takeWhile (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\NUL' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
' ')
               forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.Char8.dropWhile (forall a. Eq a => a -> a -> Bool
== Char
' ')
               forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ByteString -> ByteString
getBytes Int
off Int
len
  where
    parseOct :: ByteString -> Partial FormatError a
parseOct ByteString
s | ByteString -> Bool
BS.null ByteString
s = forall (m :: * -> *) a. Monad m => a -> m a
return a
0
    -- As a star extension, octal fields can hold a base-256 value if the high
    -- bit of the initial character is set. The initial character can be:
    --   0x80 ==> trailing characters hold a positive base-256 value
    --   0xFF ==> trailing characters hold a negative base-256 value
    --
    -- In both cases, there won't be a trailing NUL/space.
    --
    -- GNU tar seems to contain a half-implementation of code that deals with
    -- extra bits in the first character, but I don't think it works and the
    -- docs I can find on star seem to suggest that these will always be 0,
    -- which is what I will assume.
    parseOct ByteString
s | HasCallStack => ByteString -> Word8
BS.head ByteString
s forall a. Eq a => a -> a -> Bool
== Word8
128 = forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. (Integral a, Bits a) => ByteString -> a
readBytes (HasCallStack => ByteString -> ByteString
BS.tail ByteString
s))
               | HasCallStack => ByteString -> Word8
BS.head ByteString
s forall a. Eq a => a -> a -> Bool
== Word8
255 = forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. Num a => a -> a
negate (forall a. (Integral a, Bits a) => ByteString -> a
readBytes (HasCallStack => ByteString -> ByteString
BS.tail ByteString
s)))
    parseOct ByteString
s  = case forall n. Integral n => ByteString -> Maybe n
readOct ByteString
s of
      Just a
x  -> forall (m :: * -> *) a. Monad m => a -> m a
return a
x
      Maybe a
Nothing -> forall e a. e -> Partial e a
Error FormatError
HeaderBadNumericEncoding

    readBytes :: (Integral a, Bits a) => BS.ByteString -> a
    readBytes :: forall a. (Integral a, Bits a) => ByteString -> a
readBytes = forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl' (\a
acc Word8
x -> a
acc forall a. Bits a => a -> Int -> a
`shiftL` Int
8 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x) a
0

getBytes :: Int -> Int -> BS.ByteString -> BS.ByteString
getBytes :: Int -> Int -> ByteString -> ByteString
getBytes Int
off Int
len = Int -> ByteString -> ByteString
BS.take Int
len forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
BS.drop Int
off

getByte :: Int -> BS.ByteString -> Char
getByte :: Int -> ByteString -> Char
getByte Int
off ByteString
bs = ByteString -> Int -> Char
BS.Char8.index ByteString
bs Int
off

getChars :: Int -> Int -> BS.ByteString -> BS.ByteString
getChars :: Int -> Int -> ByteString -> ByteString
getChars Int
off Int
len = Int -> Int -> ByteString -> ByteString
getBytes Int
off Int
len

getString :: Int -> Int -> BS.ByteString -> BS.ByteString
getString :: Int -> Int -> ByteString -> ByteString
getString Int
off Int
len = ByteString -> ByteString
BS.copy forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.Char8.takeWhile (forall a. Eq a => a -> a -> Bool
/=Char
'\0') forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ByteString -> ByteString
getBytes Int
off Int
len

-- These days we'd just use Either, but in older versions of base there was no
-- Monad instance for Either, it was in mtl with an anoying Error constraint.
--
data Partial e a = Error e | Ok a

partial :: Partial e a -> Either e a
partial :: forall e a. Partial e a -> Either e a
partial (Error e
msg) = forall a b. a -> Either a b
Left e
msg
partial (Ok a
x)      = forall a b. b -> Either a b
Right a
x

instance Functor (Partial e) where
    fmap :: forall a b. (a -> b) -> Partial e a -> Partial e b
fmap = forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM

instance Applicative (Partial e) where
    pure :: forall a. a -> Partial e a
pure  = forall e a. a -> Partial e a
Ok
    <*> :: forall a b. Partial e (a -> b) -> Partial e a -> Partial e b
(<*>) = forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap

instance Monad (Partial e) where
    return :: forall a. a -> Partial e a
return        = forall (f :: * -> *) a. Applicative f => a -> f a
pure
    Error e
m >>= :: forall a b. Partial e a -> (a -> Partial e b) -> Partial e b
>>= a -> Partial e b
_ = forall e a. e -> Partial e a
Error e
m
    Ok    a
x >>= a -> Partial e b
k = a -> Partial e b
k a
x
#if !MIN_VERSION_base(4,13,0)
    fail          = error "fail @(Partial e)"
#endif

{-# SPECIALISE readOct :: BS.ByteString -> Maybe Int   #-}
{-# SPECIALISE readOct :: BS.ByteString -> Maybe Int64 #-}
readOct :: Integral n => BS.ByteString -> Maybe n
readOct :: forall n. Integral n => ByteString -> Maybe n
readOct ByteString
bs0 = case forall n. Integral n => Int -> n -> ByteString -> n
go Int
0 n
0 ByteString
bs0 of
                -1 -> forall a. Maybe a
Nothing
                n
n  -> forall a. a -> Maybe a
Just n
n
  where
    go :: Integral n => Int -> n -> BS.ByteString -> n
    go :: forall n. Integral n => Int -> n -> ByteString -> n
go !Int
i !n
n !ByteString
bs
      | ByteString -> Bool
BS.null ByteString
bs = if Int
i forall a. Eq a => a -> a -> Bool
== Int
0 then -n
1 else n
n
      | Bool
otherwise  =
          case ByteString -> Word8
BS.unsafeHead ByteString
bs of
            Word8
w | Word8
w forall a. Ord a => a -> a -> Bool
>= Word8
0x30
             Bool -> Bool -> Bool
&& Word8
w forall a. Ord a => a -> a -> Bool
<= Word8
0x39 -> forall n. Integral n => Int -> n -> ByteString -> n
go (Int
iforall a. Num a => a -> a -> a
+Int
1)
                                (n
n forall a. Num a => a -> a -> a
* n
8 forall a. Num a => a -> a -> a
+ (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w forall a. Num a => a -> a -> a
- n
0x30))
                                (ByteString -> ByteString
BS.unsafeTail ByteString
bs)
              | Bool
otherwise -> -n
1