From 2e3e945fca93338a487d07ba0f7d760c244a7fb6 Mon Sep 17 00:00:00 2001 From: David King Date: Mon, 18 Apr 2022 08:23:24 +0100 Subject: [PATCH 1/2] Revert "unarr: Remove obsolete unarr copy/paste" This reverts commit d97fdcda6ea210cf00329d5887287acd33836114. --- cut-n-paste/meson.build | 1 + cut-n-paste/unarr/AUTHORS | 12 + cut-n-paste/unarr/COPYING | 165 ++++ cut-n-paste/unarr/common/allocator.h | 29 + cut-n-paste/unarr/common/conv.c | 96 ++ cut-n-paste/unarr/common/crc32.c | 51 + cut-n-paste/unarr/common/stream.c | 217 +++++ cut-n-paste/unarr/common/unarr-imp.h | 81 ++ cut-n-paste/unarr/common/unarr.c | 110 +++ cut-n-paste/unarr/lzmasdk/7zTypes.h | 525 ++++++++++ cut-n-paste/unarr/lzmasdk/CpuArch.c | 478 ++++++++++ cut-n-paste/unarr/lzmasdk/CpuArch.h | 442 +++++++++ cut-n-paste/unarr/lzmasdk/Ppmd.h | 167 ++++ cut-n-paste/unarr/lzmasdk/Ppmd7.c | 1104 ++++++++++++++++++++++ cut-n-paste/unarr/lzmasdk/Ppmd7.h | 181 ++++ cut-n-paste/unarr/lzmasdk/Ppmd7Dec.c | 297 ++++++ cut-n-paste/unarr/lzmasdk/Precomp.h | 10 + cut-n-paste/unarr/lzmasdk/README.lzmasdk | 15 + cut-n-paste/unarr/meson.build | 28 + cut-n-paste/unarr/rar/filter-rar.c | 704 ++++++++++++++ cut-n-paste/unarr/rar/huffman-rar.c | 142 +++ cut-n-paste/unarr/rar/lzss.h | 88 ++ cut-n-paste/unarr/rar/parse-rar.c | 236 +++++ cut-n-paste/unarr/rar/rar.c | 254 +++++ cut-n-paste/unarr/rar/rar.h | 252 +++++ cut-n-paste/unarr/rar/rarvm.c | 616 ++++++++++++ cut-n-paste/unarr/rar/rarvm.h | 117 +++ cut-n-paste/unarr/rar/uncompress-rar.c | 1000 ++++++++++++++++++++ cut-n-paste/unarr/unarr.h | 103 ++ po/POTFILES.skip | 1 + 30 files changed, 7522 insertions(+) create mode 100644 cut-n-paste/unarr/AUTHORS create mode 100644 cut-n-paste/unarr/COPYING create mode 100644 cut-n-paste/unarr/common/allocator.h create mode 100644 cut-n-paste/unarr/common/conv.c create mode 100644 cut-n-paste/unarr/common/crc32.c create mode 100644 cut-n-paste/unarr/common/stream.c create mode 100644 cut-n-paste/unarr/common/unarr-imp.h create mode 100644 cut-n-paste/unarr/common/unarr.c create mode 100644 cut-n-paste/unarr/lzmasdk/7zTypes.h create mode 100644 cut-n-paste/unarr/lzmasdk/CpuArch.c create mode 100644 cut-n-paste/unarr/lzmasdk/CpuArch.h create mode 100644 cut-n-paste/unarr/lzmasdk/Ppmd.h create mode 100644 cut-n-paste/unarr/lzmasdk/Ppmd7.c create mode 100644 cut-n-paste/unarr/lzmasdk/Ppmd7.h create mode 100644 cut-n-paste/unarr/lzmasdk/Ppmd7Dec.c create mode 100644 cut-n-paste/unarr/lzmasdk/Precomp.h create mode 100644 cut-n-paste/unarr/lzmasdk/README.lzmasdk create mode 100644 cut-n-paste/unarr/meson.build create mode 100644 cut-n-paste/unarr/rar/filter-rar.c create mode 100644 cut-n-paste/unarr/rar/huffman-rar.c create mode 100644 cut-n-paste/unarr/rar/lzss.h create mode 100644 cut-n-paste/unarr/rar/parse-rar.c create mode 100644 cut-n-paste/unarr/rar/rar.c create mode 100644 cut-n-paste/unarr/rar/rar.h create mode 100644 cut-n-paste/unarr/rar/rarvm.c create mode 100644 cut-n-paste/unarr/rar/rarvm.h create mode 100644 cut-n-paste/unarr/rar/uncompress-rar.c create mode 100644 cut-n-paste/unarr/unarr.h diff --git a/cut-n-paste/meson.build b/cut-n-paste/meson.build index 279061d2..0f4dbd30 100644 --- a/cut-n-paste/meson.build +++ b/cut-n-paste/meson.build @@ -3,6 +3,7 @@ cut_n_paste_inc = include_directories('.') subdir('gimpcellrenderertoggle') subdir('libdazzle') subdir('libgd') +subdir('unarr') if not external_synctex subdir('synctex') diff --git a/cut-n-paste/unarr/AUTHORS b/cut-n-paste/unarr/AUTHORS new file mode 100644 index 00000000..4af1be7c --- /dev/null +++ b/cut-n-paste/unarr/AUTHORS @@ -0,0 +1,12 @@ +unarr contains code by: + +* The Unarchiver project (https://code.google.com/p/theunarchiver/) +* Simon Bünzli (zeniko at gmail.com, http://www.zeniko.ch/#SumatraPDF) + +Most code is licensed under LGPLv3 (see COPYING). Exceptions are in code +included from other projects: + +Files License URL +---------------------------------------------------------------------------------- +common/crc32.c Public Domain https://gnunet.org/svn/gnunet/src/util/crypto_crc.c +lzmasdk/*.* Public Domain http://www.7-zip.org/sdk.html diff --git a/cut-n-paste/unarr/COPYING b/cut-n-paste/unarr/COPYING new file mode 100644 index 00000000..65c5ca88 --- /dev/null +++ b/cut-n-paste/unarr/COPYING @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/cut-n-paste/unarr/common/allocator.h b/cut-n-paste/unarr/common/allocator.h new file mode 100644 index 00000000..41199c80 --- /dev/null +++ b/cut-n-paste/unarr/common/allocator.h @@ -0,0 +1,29 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef common_allocator_h +#define common_allocator_h + +#ifdef USE_CUSTOM_ALLOCATOR + +#include + +typedef void *(* custom_malloc_fn)(void *opaque, size_t size); +typedef void (* custom_free_fn)(void *opaque, void *ptr); + +void ar_set_custom_allocator(custom_malloc_fn custom_malloc, custom_free_fn custom_free, void *opaque); + +#define malloc(size) ar_malloc(size) +#define calloc(count, size) ar_calloc(count, size) +#define free(ptr) ar_free(ptr) + +#define realloc(ptr, size) _use_malloc_memcpy_free_instead(ptr, size) +#define strdup(str) _use_malloc_memcpy_instead(str) + +#elif !defined(NDEBUG) && defined(_MSC_VER) + +#include + +#endif + +#endif diff --git a/cut-n-paste/unarr/common/conv.c b/cut-n-paste/unarr/common/conv.c new file mode 100644 index 00000000..4398539b --- /dev/null +++ b/cut-n-paste/unarr/common/conv.c @@ -0,0 +1,96 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "unarr-imp.h" + +#include + +/* data from http://en.wikipedia.org/wiki/Cp437 */ +static const wchar_t gCp437[256] = { + 0, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, 0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266C, 0x263C, + 0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8, 0x2191, 0x2193, 0x2192, 0x2190, 0x221F, 0x2194, 0x25B2, 0x25BC, + ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', + '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 0x2302, + 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229, + 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0, +}; + +size_t ar_conv_rune_to_utf8(wchar_t rune, char *out, size_t size) +{ + if (size < 1) + return 0; + if (rune < 0x0080) { + *out++ = rune & 0x7F; + return 1; + } + if (rune < 0x0800 && size >= 2) { + *out++ = 0xC0 | ((rune >> 6) & 0x1F); + *out++ = 0x80 | (rune & 0x3F); + return 2; + } + if (size >= 3) { + if ((0xD800 <= rune && rune <= 0xDFFF) || rune >= 0x10000) + rune = 0xFFFD; + *out++ = 0xE0 | ((rune >> 12) & 0x0F); + *out++ = 0x80 | ((rune >> 6) & 0x3F); + *out++ = 0x80 | (rune & 0x3F); + return 3; + } + *out++ = '?'; + return 1; +} + +char *ar_conv_dos_to_utf8(const char *astr) +{ + char *str, *out; + const char *in; + size_t size; + + size = 0; + for (in = astr; *in; in++) { + char buf[4]; + size += ar_conv_rune_to_utf8(gCp437[(uint8_t)*in], buf, sizeof(buf)); + } + + if (size == (size_t)-1) + return NULL; + str = malloc(size + 1); + if (!str) + return NULL; + + for (in = astr, out = str; *in; in++) { + out += ar_conv_rune_to_utf8(gCp437[(uint8_t)*in], out, str + size - out); + } + *out = '\0'; + + return str; +} + +time64_t ar_conv_dosdate_to_filetime(uint32_t dosdate) +{ + struct tm tm; + time_t t1, t2; + + tm.tm_sec = (dosdate & 0x1F) * 2; + tm.tm_min = (dosdate >> 5) & 0x3F; + tm.tm_hour = (dosdate >> 11) & 0x1F; + tm.tm_mday = (dosdate >> 16) & 0x1F; + tm.tm_mon = ((dosdate >> 21) & 0x0F) - 1; + tm.tm_year = ((dosdate >> 25) & 0x7F) + 80; + tm.tm_isdst = -1; + + t1 = mktime(&tm); + t2 = mktime(gmtime(&t1)); + + return (time64_t)(2 * t1 - t2 + 11644473600) * 10000000; +} diff --git a/cut-n-paste/unarr/common/crc32.c b/cut-n-paste/unarr/common/crc32.c new file mode 100644 index 00000000..b482e6e3 --- /dev/null +++ b/cut-n-paste/unarr/common/crc32.c @@ -0,0 +1,51 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "unarr-imp.h" + +#ifndef HAVE_ZLIB + +/* code adapted from https://gnunet.org/svn/gnunet/src/util/crypto_crc.c (public domain) */ + +static bool crc_table_ready = false; +static uint32_t crc_table[256]; + +uint32_t ar_crc32(uint32_t crc32, const unsigned char *data, size_t data_len) +{ + if (!crc_table_ready) { + uint32_t i, j; + uint32_t h = 1; + crc_table[0] = 0; + for (i = 128; i; i >>= 1) { + h = (h >> 1) ^ ((h & 1) ? 0xEDB88320 : 0); + for (j = 0; j < 256; j += 2 * i) { + crc_table[i + j] = crc_table[j] ^ h; + } + } + crc_table_ready = true; + } + + crc32 = crc32 ^ 0xFFFFFFFF; + while (data_len-- > 0) { + crc32 = (crc32 >> 8) ^ crc_table[(crc32 ^ *data++) & 0xFF]; + } + return crc32 ^ 0xFFFFFFFF; +} + +#else + +#include + +uint32_t ar_crc32(uint32_t crc, const unsigned char *data, size_t data_len) +{ +#if SIZE_MAX > UINT32_MAX + while (data_len > UINT32_MAX) { + crc = crc32(crc, data, UINT32_MAX); + data += UINT32_MAX; + data_len -= UINT32_MAX; + } +#endif + return crc32(crc, data, (uint32_t)data_len); +} + +#endif diff --git a/cut-n-paste/unarr/common/stream.c b/cut-n-paste/unarr/common/stream.c new file mode 100644 index 00000000..64fe19b3 --- /dev/null +++ b/cut-n-paste/unarr/common/stream.c @@ -0,0 +1,217 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "unarr-imp.h" + +ar_stream *ar_open_stream(void *data, ar_stream_close_fn close, ar_stream_read_fn read, ar_stream_seek_fn seek, ar_stream_tell_fn tell) +{ + ar_stream *stream = malloc(sizeof(ar_stream)); + if (!stream) { + close(data); + return NULL; + } + stream->data = data; + stream->close = close; + stream->read = read; + stream->seek = seek; + stream->tell = tell; + return stream; +} + +void ar_close(ar_stream *stream) +{ + if (stream) + stream->close(stream->data); + free(stream); +} + +size_t ar_read(ar_stream *stream, void *buffer, size_t count) +{ + return stream->read(stream->data, buffer, count); +} + +bool ar_seek(ar_stream *stream, off64_t offset, int origin) +{ + return stream->seek(stream->data, offset, origin); +} + +bool ar_skip(ar_stream *stream, off64_t count) +{ + return stream->seek(stream->data, count, SEEK_CUR); +} + +off64_t ar_tell(ar_stream *stream) +{ + return stream->tell(stream->data); +} + +/***** stream based on FILE *****/ + +static void file_close(void *data) +{ + fclose(data); +} + +static size_t file_read(void *data, void *buffer, size_t count) +{ + return fread(buffer, 1, count, data); +} + +static bool file_seek(void *data, off64_t offset, int origin) +{ +#ifdef _MSC_VER + return _fseeki64(data, offset, origin) == 0; +#else +#if _POSIX_C_SOURCE >= 200112L + if (sizeof(off_t) == 8) + return fseeko(data, offset, origin) == 0; +#endif + if (offset > INT32_MAX || offset < INT32_MIN) + return false; + return fseek(data, (long)offset, origin) == 0; +#endif +} + +static off64_t file_tell(void *data) +{ +#ifdef _MSC_VER + return _ftelli64(data); +#elif _POSIX_C_SOURCE >= 200112L + return ftello(data); +#else + return ftell(data); +#endif +} + +ar_stream *ar_open_file(const char *path) +{ + FILE *f = path ? fopen(path, "rb") : NULL; + if (!f) + return NULL; + return ar_open_stream(f, file_close, file_read, file_seek, file_tell); +} + +#ifdef _WIN32 +ar_stream *ar_open_file_w(const wchar_t *path) +{ + FILE *f = path ? _wfopen(path, L"rb") : NULL; + if (!f) + return NULL; + return ar_open_stream(f, file_close, file_read, file_seek, file_tell); +} +#endif + +/***** stream based on preallocated memory *****/ + +struct MemoryStream { + const uint8_t *data; + size_t length; + size_t offset; +}; + +static void memory_close(void *data) +{ + struct MemoryStream *stm = data; + free(stm); +} + +static size_t memory_read(void *data, void *buffer, size_t count) +{ + struct MemoryStream *stm = data; + if (count > stm->length - stm->offset) + count = stm->length - stm->offset; + memcpy(buffer, stm->data + stm->offset, count); + stm->offset += count; + return count; +} + +static bool memory_seek(void *data, off64_t offset, int origin) +{ + struct MemoryStream *stm = data; + if (origin == SEEK_CUR) + offset += stm->offset; + else if (origin == SEEK_END) + offset += stm->length; + if (offset < 0 || offset > (off64_t)stm->length || (size_t)offset > stm->length) + return false; + stm->offset = (size_t)offset; + return true; +} + +static off64_t memory_tell(void *data) +{ + struct MemoryStream *stm = data; + return stm->offset; +} + +ar_stream *ar_open_memory(const void *data, size_t datalen) +{ + struct MemoryStream *stm = malloc(sizeof(struct MemoryStream)); + if (!stm) + return NULL; + stm->data = data; + stm->length = datalen; + stm->offset = 0; + return ar_open_stream(stm, memory_close, memory_read, memory_seek, memory_tell); +} + +#ifdef _WIN32 +/***** stream based on IStream *****/ + +#define COBJMACROS +#include + +static void stream_close(void *data) +{ + IUnknown_Release((IStream *)data); +} + +static size_t stream_read(void *data, void *buffer, size_t count) +{ + size_t read = 0; + HRESULT res; + ULONG cbRead; +#ifdef _WIN64 + while (count > ULONG_MAX) { + res = IStream_Read((IStream *)data, buffer, ULONG_MAX, &cbRead); + if (FAILED(res)) + return read; + read += cbRead; + buffer = (BYTE *)buffer + ULONG_MAX; + count -= ULONG_MAX; + } +#endif + res = IStream_Read((IStream *)data, buffer, (ULONG)count, &cbRead); + if (SUCCEEDED(res)) + read += cbRead; + return read; +} + +static bool stream_seek(void *data, off64_t offset, int origin) +{ + LARGE_INTEGER off; + ULARGE_INTEGER n; + HRESULT res; + off.QuadPart = offset; + res = IStream_Seek((IStream *)data, off, origin, &n); + return SUCCEEDED(res); +} + +static off64_t stream_tell(void *data) +{ + LARGE_INTEGER zero = { 0 }; + ULARGE_INTEGER n = { 0 }; + IStream_Seek((IStream *)data, zero, SEEK_CUR, &n); + return (off64_t)n.QuadPart; +} + +ar_stream *ar_open_istream(IStream *stream) +{ + LARGE_INTEGER zero = { 0 }; + HRESULT res = IStream_Seek(stream, zero, STREAM_SEEK_SET, NULL); + if (FAILED(res)) + return NULL; + IUnknown_AddRef(stream); + return ar_open_stream(stream, stream_close, stream_read, stream_seek, stream_tell); +} +#endif diff --git a/cut-n-paste/unarr/common/unarr-imp.h b/cut-n-paste/unarr/common/unarr-imp.h new file mode 100644 index 00000000..90ad3178 --- /dev/null +++ b/cut-n-paste/unarr/common/unarr-imp.h @@ -0,0 +1,81 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* this is the common private/implementation API of unarr which should only be used by unarr code */ + +#ifndef common_unarr_imp_h +#define common_unarr_imp_h + +#include "../unarr.h" +#include "allocator.h" + +#include +#include +#include +#include +#include + +/***** conv ****/ + +size_t ar_conv_rune_to_utf8(wchar_t rune, char *out, size_t size); +char *ar_conv_dos_to_utf8(const char *astr); +time64_t ar_conv_dosdate_to_filetime(uint32_t dosdate); + +/***** crc32 *****/ + +uint32_t ar_crc32(uint32_t crc32, const unsigned char *data, size_t data_len); + +/***** stream *****/ + +typedef void (* ar_stream_close_fn)(void *data); +typedef size_t (* ar_stream_read_fn)(void *data, void *buffer, size_t count); +typedef bool (* ar_stream_seek_fn)(void *data, off64_t offset, int origin); +typedef off64_t (* ar_stream_tell_fn)(void *data); + +struct ar_stream_s { + ar_stream_close_fn close; + ar_stream_read_fn read; + ar_stream_seek_fn seek; + ar_stream_tell_fn tell; + void *data; +}; + +ar_stream *ar_open_stream(void *data, ar_stream_close_fn close, ar_stream_read_fn read, ar_stream_seek_fn seek, ar_stream_tell_fn tell); + +/***** unarr *****/ + +#define warn(...) ar_log("!", __FILE__, __LINE__, __VA_ARGS__) +#ifndef NDEBUG +#define log(...) ar_log("-", __FILE__, __LINE__, __VA_ARGS__) +#else +#define log(...) ((void)0) +#endif +void ar_log(const char *prefix, const char *file, int line, const char *msg, ...); + +typedef void (* ar_archive_close_fn)(ar_archive *ar); +typedef bool (* ar_parse_entry_fn)(ar_archive *ar, off64_t offset); +typedef const char *(* ar_entry_get_name_fn)(ar_archive *ar); +typedef bool (* ar_entry_uncompress_fn)(ar_archive *ar, void *buffer, size_t count); +typedef size_t (* ar_get_global_comment_fn)(ar_archive *ar, void *buffer, size_t count); + +struct ar_archive_s { + ar_archive_close_fn close; + ar_parse_entry_fn parse_entry; + ar_entry_get_name_fn get_name; + ar_entry_uncompress_fn uncompress; + ar_get_global_comment_fn get_comment; + + ar_stream *stream; + bool at_eof; + off64_t entry_offset; + off64_t entry_offset_first; + off64_t entry_offset_next; + size_t entry_size_uncompressed; + time64_t entry_filetime; +}; + +ar_archive *ar_open_archive(ar_stream *stream, size_t struct_size, ar_archive_close_fn close, ar_parse_entry_fn parse_entry, + ar_entry_get_name_fn get_name, ar_entry_uncompress_fn uncompress, ar_get_global_comment_fn get_comment, + off64_t first_entry_offset); + +#endif diff --git a/cut-n-paste/unarr/common/unarr.c b/cut-n-paste/unarr/common/unarr.c new file mode 100644 index 00000000..49c6d981 --- /dev/null +++ b/cut-n-paste/unarr/common/unarr.c @@ -0,0 +1,110 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "unarr-imp.h" + +ar_archive *ar_open_archive(ar_stream *stream, size_t struct_size, ar_archive_close_fn close, ar_parse_entry_fn parse_entry, + ar_entry_get_name_fn get_name, ar_entry_uncompress_fn uncompress, ar_get_global_comment_fn get_comment, + off64_t first_entry_offset) +{ + ar_archive *ar = malloc(struct_size); + if (!ar) + return NULL; + memset(ar, 0, struct_size); + ar->close = close; + ar->parse_entry = parse_entry; + ar->get_name = get_name; + ar->uncompress = uncompress; + ar->get_comment = get_comment; + ar->stream = stream; + ar->entry_offset_first = first_entry_offset; + ar->entry_offset_next = first_entry_offset; + return ar; +} + +void ar_close_archive(ar_archive *ar) +{ + if (ar) + ar->close(ar); + free(ar); +} + +bool ar_at_eof(ar_archive *ar) +{ + return ar->at_eof; +} + +bool ar_parse_entry(ar_archive *ar) +{ + return ar->parse_entry(ar, ar->entry_offset_next); +} + +bool ar_parse_entry_at(ar_archive *ar, off64_t offset) +{ + ar->at_eof = false; + return ar->parse_entry(ar, offset ? offset : ar->entry_offset_first); +} + +bool ar_parse_entry_for(ar_archive *ar, const char *entry_name) +{ + ar->at_eof = false; + if (!entry_name) + return false; + if (!ar_parse_entry_at(ar, ar->entry_offset_first)) + return false; + do { + const char *name = ar_entry_get_name(ar); + if (name && strcmp(name, entry_name) == 0) + return true; + } while (ar_parse_entry(ar)); + return false; +} + +const char *ar_entry_get_name(ar_archive *ar) +{ + return ar->get_name(ar); +} + +off64_t ar_entry_get_offset(ar_archive *ar) +{ + return ar->entry_offset; +} + +size_t ar_entry_get_size(ar_archive *ar) +{ + return ar->entry_size_uncompressed; +} + +time64_t ar_entry_get_filetime(ar_archive *ar) +{ + return ar->entry_filetime; +} + +bool ar_entry_uncompress(ar_archive *ar, void *buffer, size_t count) +{ + return ar->uncompress(ar, buffer, count); +} + +size_t ar_get_global_comment(ar_archive *ar, void *buffer, size_t count) +{ + if (!ar->get_comment) + return 0; + return ar->get_comment(ar, buffer, count); +} + +__attribute__((__format__ (__printf__, 4, 0))) +void ar_log(const char *prefix, const char *file, int line, const char *msg, ...) +{ + va_list args; + va_start(args, msg); + if (prefix) + fprintf(stderr, "%s ", prefix); + if (strrchr(file, '/')) + file = strrchr(file, '/') + 1; + if (strrchr(file, '\\')) + file = strrchr(file, '\\') + 1; + fprintf(stderr, "%s:%d: ", file, line); + vfprintf(stderr, msg, args); + fprintf(stderr, "\n"); + va_end(args); +} diff --git a/cut-n-paste/unarr/lzmasdk/7zTypes.h b/cut-n-paste/unarr/lzmasdk/7zTypes.h new file mode 100644 index 00000000..fe4fde3f --- /dev/null +++ b/cut-n-paste/unarr/lzmasdk/7zTypes.h @@ -0,0 +1,525 @@ +/* 7zTypes.h -- Basic types +2021-12-25 : Igor Pavlov : Public domain */ + +#ifndef __7Z_TYPES_H +#define __7Z_TYPES_H + +#ifdef _WIN32 +/* #include */ +#else +#include +#endif + +#include + +#ifndef EXTERN_C_BEGIN +#ifdef __cplusplus +#define EXTERN_C_BEGIN extern "C" { +#define EXTERN_C_END } +#else +#define EXTERN_C_BEGIN +#define EXTERN_C_END +#endif +#endif + +EXTERN_C_BEGIN + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + + +#ifdef _MSC_VER + #if _MSC_VER > 1200 + #define MY_ALIGN(n) __declspec(align(n)) + #else + #define MY_ALIGN(n) + #endif +#else + #define MY_ALIGN(n) __attribute__ ((aligned(n))) +#endif + + +#ifdef _WIN32 + +/* typedef DWORD WRes; */ +typedef unsigned WRes; +#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) + +// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) + +#else // _WIN32 + +// #define ENV_HAVE_LSTAT +typedef int WRes; + +// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT +#define MY__FACILITY_ERRNO 0x800 +#define MY__FACILITY_WIN32 7 +#define MY__FACILITY__WRes MY__FACILITY_ERRNO + +#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ + ( (HRESULT)(x) & 0x0000FFFF) \ + | (MY__FACILITY__WRes << 16) \ + | (HRESULT)0x80000000 )) + +#define MY_SRes_HRESULT_FROM_WRes(x) \ + ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x)) + +// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno) +#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x) + +/* +#define ERROR_FILE_NOT_FOUND 2L +#define ERROR_ACCESS_DENIED 5L +#define ERROR_NO_MORE_FILES 18L +#define ERROR_LOCK_VIOLATION 33L +#define ERROR_FILE_EXISTS 80L +#define ERROR_DISK_FULL 112L +#define ERROR_NEGATIVE_SEEK 131L +#define ERROR_ALREADY_EXISTS 183L +#define ERROR_DIRECTORY 267L +#define ERROR_TOO_MANY_POSTS 298L + +#define ERROR_INTERNAL_ERROR 1359L +#define ERROR_INVALID_REPARSE_DATA 4392L +#define ERROR_REPARSE_TAG_INVALID 4393L +#define ERROR_REPARSE_TAG_MISMATCH 4394L +*/ + +// we use errno equivalents for some WIN32 errors: + +#define ERROR_INVALID_PARAMETER EINVAL +#define ERROR_INVALID_FUNCTION EINVAL +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_FILE_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +// #define ERROR_INVALID_HANDLE EBADF + +// we use FACILITY_WIN32 for errors that has no errno equivalent +// Too many posts were made to a semaphore. +#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL) +#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) +#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) + +// if (MY__FACILITY__WRes != FACILITY_WIN32), +// we use FACILITY_WIN32 for COM errors: +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) + +/* +// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: +#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) +#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +*/ + +// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits +typedef long INT_PTR; +typedef unsigned long UINT_PTR; + +#define TEXT(quote) quote + +#define FILE_ATTRIBUTE_READONLY 0x0001 +#define FILE_ATTRIBUTE_HIDDEN 0x0002 +#define FILE_ATTRIBUTE_SYSTEM 0x0004 +#define FILE_ATTRIBUTE_DIRECTORY 0x0010 +#define FILE_ATTRIBUTE_ARCHIVE 0x0020 +#define FILE_ATTRIBUTE_DEVICE 0x0040 +#define FILE_ATTRIBUTE_NORMAL 0x0080 +#define FILE_ATTRIBUTE_TEMPORARY 0x0100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400 +#define FILE_ATTRIBUTE_COMPRESSED 0x0800 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 + +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ + +#endif + + +#ifndef RINOK +#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +#ifndef RINOK_WRes +#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef _LZMA_UINT32_IS_ULONG +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + + +#ifndef _WIN32 + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +#define VOID void + +#define HRESULT LONG + +typedef void *LPVOID; +// typedef void VOID; +// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; +// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits) +typedef long INT_PTR; +typedef unsigned long UINT_PTR; +typedef long LONG_PTR; +typedef unsigned long DWORD_PTR; + +typedef size_t SIZE_T; + +#endif // _WIN32 + + +#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) + + +#ifdef _SZ_NO_INT_64 + +/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. + NOTES: Some code will work incorrectly in that case! */ + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if defined(_MSC_VER) || defined(__BORLANDC__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#define UINT64_CONST(n) n +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +#define UINT64_CONST(n) n ## ULL +#endif + +#endif + +#ifdef _LZMA_NO_SYSTEM_SIZE_T +typedef UInt32 SizeT; +#else +typedef size_t SizeT; +#endif + +typedef int BoolInt; +/* typedef BoolInt Bool; */ +#define True 1 +#define False 0 + + +#ifdef _WIN32 +#define MY_STD_CALL __stdcall +#else +#define MY_STD_CALL +#endif + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define MY_NO_INLINE __declspec(noinline) +#else +#define MY_NO_INLINE +#endif + +#define MY_FORCE_INLINE __forceinline + +#define MY_CDECL __cdecl +#define MY_FAST_CALL __fastcall + +#else // _MSC_VER + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4)) \ + || defined(__INTEL_COMPILER) \ + || defined(__xlC__) +#define MY_NO_INLINE __attribute__((noinline)) +// #define MY_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define MY_NO_INLINE +#endif + +#define MY_FORCE_INLINE + + +#define MY_CDECL + +#if defined(_M_IX86) \ + || defined(__i386__) +// #define MY_FAST_CALL __attribute__((fastcall)) +// #define MY_FAST_CALL __attribute__((cdecl)) +#define MY_FAST_CALL +#elif defined(MY_CPU_AMD64) +// #define MY_FAST_CALL __attribute__((ms_abi)) +#define MY_FAST_CALL +#else +#define MY_FAST_CALL +#endif + +#endif // _MSC_VER + + +/* The following interfaces use first parameter as pointer to structure */ + +typedef struct IByteIn IByteIn; +struct IByteIn +{ + Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ +}; +#define IByteIn_Read(p) (p)->Read(p) + + +typedef struct IByteOut IByteOut; +struct IByteOut +{ + void (*Write)(const IByteOut *p, Byte b); +}; +#define IByteOut_Write(p, b) (p)->Write(p, b) + + +typedef struct ISeqInStream ISeqInStream; +struct ISeqInStream +{ + SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +}; +#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) + +/* it can return SZ_ERROR_INPUT_EOF */ +SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); +SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); + + +typedef struct ISeqOutStream ISeqOutStream; +struct ISeqOutStream +{ + size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +}; +#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + + +typedef struct ISeekInStream ISeekInStream; +struct ISeekInStream +{ + SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); +}; +#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +typedef struct ILookInStream ILookInStream; +struct ILookInStream +{ + SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(const ILookInStream *p, size_t offset); + /* offset must be <= output(*size) of Look */ + + SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); +}; + +#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) +#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) +#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); + + + +typedef struct +{ + ILookInStream vt; + const ISeekInStream *realStream; + + size_t pos; + size_t size; /* it's data size */ + + /* the following variables must be set outside */ + Byte *buf; + size_t bufSize; +} CLookToRead2; + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); + +#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + + +typedef struct ICompressProgress ICompressProgress; + +struct ICompressProgress +{ + SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +}; +#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) + + + +typedef struct ISzAlloc ISzAlloc; +typedef const ISzAlloc * ISzAllocPtr; + +struct ISzAlloc +{ + void *(*Alloc)(ISzAllocPtr p, size_t size); + void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ +}; + +#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) +#define ISzAlloc_Free(p, a) (p)->Free(p, a) + +/* deprecated */ +#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) +#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) + + + + + +#ifndef MY_offsetof + #ifdef offsetof + #define MY_offsetof(type, m) offsetof(type, m) + /* + #define MY_offsetof(type, m) FIELD_OFFSET(type, m) + */ + #else + #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) + #endif +#endif + + + +#ifndef MY_container_of + +/* +#define MY_container_of(ptr, type, m) container_of(ptr, type, m) +#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +*/ + +/* + GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" + GCC 3.4.4 : classes with constructor + GCC 4.8.1 : classes with non-public variable members" +*/ + +#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) + +#endif + +#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) + +/* +#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +*/ +#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) + +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +/* +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) +*/ + + +#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) + +#ifdef _WIN32 + +#define CHAR_PATH_SEPARATOR '\\' +#define WCHAR_PATH_SEPARATOR L'\\' +#define STRING_PATH_SEPARATOR "\\" +#define WSTRING_PATH_SEPARATOR L"\\" + +#else + +#define CHAR_PATH_SEPARATOR '/' +#define WCHAR_PATH_SEPARATOR L'/' +#define STRING_PATH_SEPARATOR "/" +#define WSTRING_PATH_SEPARATOR L"/" + +#endif + +EXTERN_C_END + +#endif diff --git a/cut-n-paste/unarr/lzmasdk/CpuArch.c b/cut-n-paste/unarr/lzmasdk/CpuArch.c new file mode 100644 index 00000000..9bcb8fd5 --- /dev/null +++ b/cut-n-paste/unarr/lzmasdk/CpuArch.c @@ -0,0 +1,478 @@ +/* CpuArch.c -- CPU specific code +2021-07-13 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + +#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) +#define USE_ASM +#endif + +#if !defined(USE_ASM) && _MSC_VER >= 1500 +#include +#endif + +#if defined(USE_ASM) && !defined(MY_CPU_AMD64) +static UInt32 CheckFlag(UInt32 flag) +{ + #ifdef _MSC_VER + __asm pushfd; + __asm pop EAX; + __asm mov EDX, EAX; + __asm xor EAX, flag; + __asm push EAX; + __asm popfd; + __asm pushfd; + __asm pop EAX; + __asm xor EAX, EDX; + __asm push EDX; + __asm popfd; + __asm and flag, EAX; + #else + __asm__ __volatile__ ( + "pushf\n\t" + "pop %%EAX\n\t" + "movl %%EAX,%%EDX\n\t" + "xorl %0,%%EAX\n\t" + "push %%EAX\n\t" + "popf\n\t" + "pushf\n\t" + "pop %%EAX\n\t" + "xorl %%EDX,%%EAX\n\t" + "push %%EDX\n\t" + "popf\n\t" + "andl %%EAX, %0\n\t": + "=c" (flag) : "c" (flag) : + "%eax", "%edx"); + #endif + return flag; +} +#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False; +#else +#define CHECK_CPUID_IS_SUPPORTED +#endif + +#ifndef USE_ASM + #ifdef _MSC_VER + #if _MSC_VER >= 1600 + #define MY__cpuidex __cpuidex + #else + +/* + __cpuid (function == 4) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) doesn't clear ECX + We still can use __cpuid for low (function) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FAST_CALL / NO_INLINE function, + So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + + DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! +*/ + +static +MY_NO_INLINE +void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) +{ + UNUSED_VAR(subFunction); + __cpuid(CPUInfo, function); +} + + #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) + #pragma message("======== MY__cpuidex_HACK WAS USED ========") + #endif + #else + #define MY__cpuidex(info, func, func2) __cpuid(info, func) + #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") + #endif +#endif + + + + +void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) +{ + #ifdef USE_ASM + + #ifdef _MSC_VER + + UInt32 a2, b2, c2, d2; + __asm xor EBX, EBX; + __asm xor ECX, ECX; + __asm xor EDX, EDX; + __asm mov EAX, function; + __asm cpuid; + __asm mov a2, EAX; + __asm mov b2, EBX; + __asm mov c2, ECX; + __asm mov d2, EDX; + + *a = a2; + *b = b2; + *c = c2; + *d = d2; + + #else + + __asm__ __volatile__ ( + #if defined(MY_CPU_AMD64) && defined(__PIC__) + "mov %%rbx, %%rdi;" + "cpuid;" + "xchg %%rbx, %%rdi;" + : "=a" (*a) , + "=D" (*b) , + #elif defined(MY_CPU_X86) && defined(__PIC__) + "mov %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=a" (*a) , + "=D" (*b) , + #else + "cpuid" + : "=a" (*a) , + "=b" (*b) , + #endif + "=c" (*c) , + "=d" (*d) + : "0" (function), "c"(0) ) ; + + #endif + + #else + + int CPUInfo[4]; + + MY__cpuidex(CPUInfo, (int)function, 0); + + *a = (UInt32)CPUInfo[0]; + *b = (UInt32)CPUInfo[1]; + *c = (UInt32)CPUInfo[2]; + *d = (UInt32)CPUInfo[3]; + + #endif +} + +BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p) +{ + CHECK_CPUID_IS_SUPPORTED + MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); + MyCPUID(1, &p->ver, &p->b, &p->c, &p->d); + return True; +} + +static const UInt32 kVendors[][3] = +{ + { 0x756E6547, 0x49656E69, 0x6C65746E}, + { 0x68747541, 0x69746E65, 0x444D4163}, + { 0x746E6543, 0x48727561, 0x736C7561} +}; + +int x86cpuid_GetFirm(const Cx86cpuid *p) +{ + unsigned i; + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) + { + const UInt32 *v = kVendors[i]; + if (v[0] == p->vendor[0] && + v[1] == p->vendor[1] && + v[2] == p->vendor[2]) + return (int)i; + } + return -1; +} + +BoolInt CPU_Is_InOrder(void) +{ + Cx86cpuid p; + int firm; + UInt32 family, model; + if (!x86cpuid_CheckAndRead(&p)) + return True; + + family = x86cpuid_GetFamily(p.ver); + model = x86cpuid_GetModel(p.ver); + + firm = x86cpuid_GetFirm(&p); + + switch (firm) + { + case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( + /* In-Order Atom CPU */ + model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ + || model == 0x26 /* 45 nm, Z6xx */ + || model == 0x27 /* 32 nm, Z2460 */ + || model == 0x35 /* 32 nm, Z2760 */ + || model == 0x36 /* 32 nm, N2xxx, D2xxx */ + ))); + case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); + case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); + } + return True; +} + +#if !defined(MY_CPU_AMD64) && defined(_WIN32) +#include +static BoolInt CPU_Sys_Is_SSE_Supported(void) +{ + OSVERSIONINFO vi; + vi.dwOSVersionInfoSize = sizeof(vi); + if (!GetVersionEx(&vi)) + return False; + return (vi.dwMajorVersion >= 5); +} +#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; +#else +#define CHECK_SYS_SSE_SUPPORT +#endif + + +static UInt32 X86_CPUID_ECX_Get_Flags(void) +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return 0; + return p.c; +} + +BoolInt CPU_IsSupported_AES(void) +{ + return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3(void) +{ + return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41(void) +{ + return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA(void) +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return False; + + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + return (d[1] >> 29) & 1; + } +} + +// #include + +#ifdef _WIN32 +#include +#endif + +BoolInt CPU_IsSupported_AVX2(void) +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5); // avx2 + } +} + +BoolInt CPU_IsSupported_VAES_AVX2(void) +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (d[2] >> 9); // vaes // VEX-256/EVEX + } +} + +BoolInt CPU_IsSupported_PageGB(void) +{ + Cx86cpuid cpuid; + if (!x86cpuid_CheckAndRead(&cpuid)) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); + if (d[0] < 0x80000001) + return False; + } + { + UInt32 d[4] = { 0 }; + MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]); + return (d[3] >> 26) & 1; + } +} + + +#elif defined(MY_CPU_ARM_OR_ARM64) + +#ifdef _WIN32 + +#include + +BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } + +#else + +#if defined(__APPLE__) + +/* +#include +#include +static void Print_sysctlbyname(const char *name) +{ + size_t bufSize = 256; + char buf[256]; + int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); + { + int i; + printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); + for (i = 0; i < 20; i++) + printf(" %2x", (unsigned)(Byte)buf[i]); + + } +} +*/ + +static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) +{ + UInt32 val = 0; + if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + return 1; + return 0; +} + + /* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); + */ + +BoolInt CPU_IsSupported_CRC32(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); +} + +BoolInt CPU_IsSupported_NEON(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); +} + +#ifdef MY_CPU_ARM64 +#define APPLE_CRYPTO_SUPPORT_VAL 1 +#else +#define APPLE_CRYPTO_SUPPORT_VAL 0 +#endif + +BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } + + +#else // __APPLE__ + +#include + +#define USE_HWCAP + +#ifdef USE_HWCAP + +#include + + #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ + BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } + +#ifdef MY_CPU_ARM64 + #define MY_HWCAP_CHECK_FUNC(name) \ + MY_HWCAP_CHECK_FUNC_2(name, name) + MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +// MY_HWCAP_CHECK_FUNC (ASIMD) +#elif defined(MY_CPU_ARM) + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } + MY_HWCAP_CHECK_FUNC_2(NEON, NEON) +#endif + +#else // USE_HWCAP + + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return 0; } + MY_HWCAP_CHECK_FUNC(NEON) + +#endif // USE_HWCAP + +MY_HWCAP_CHECK_FUNC (CRC32) +MY_HWCAP_CHECK_FUNC (SHA1) +MY_HWCAP_CHECK_FUNC (SHA2) +MY_HWCAP_CHECK_FUNC (AES) + +#endif // __APPLE__ +#endif // _WIN32 + +#endif // MY_CPU_ARM_OR_ARM64 + + + +#ifdef __APPLE__ + +#include + +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +{ + return sysctlbyname(name, buf, bufSize, NULL, 0); +} + +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +{ + size_t bufSize = sizeof(*val); + int res = My_sysctlbyname_Get(name, val, &bufSize); + if (res == 0 && bufSize != sizeof(*val)) + return EFAULT; + return res; +} + +#endif diff --git a/cut-n-paste/unarr/lzmasdk/CpuArch.h b/cut-n-paste/unarr/lzmasdk/CpuArch.h new file mode 100644 index 00000000..529d3a50 --- /dev/null +++ b/cut-n-paste/unarr/lzmasdk/CpuArch.h @@ -0,0 +1,442 @@ +/* CpuArch.h -- CPU specific code +2021-07-13 : Igor Pavlov : Public domain */ + +#ifndef __CPU_ARCH_H +#define __CPU_ARCH_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* +MY_CPU_LE means that CPU is LITTLE ENDIAN. +MY_CPU_BE means that CPU is BIG ENDIAN. +If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. + +MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. + +MY_CPU_64BIT means that processor can work with 64-bit registers. + MY_CPU_64BIT can be used to select fast code branch + MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) +*/ + +#if defined(_M_X64) \ + || defined(_M_AMD64) \ + || defined(__x86_64__) \ + || defined(__AMD64__) \ + || defined(__amd64__) + #define MY_CPU_AMD64 + #ifdef __ILP32__ + #define MY_CPU_NAME "x32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "x64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#endif + + +#if defined(_M_IX86) \ + || defined(__i386__) + #define MY_CPU_X86 + #define MY_CPU_NAME "x86" + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_ARM64) \ + || defined(__AARCH64EL__) \ + || defined(__AARCH64EB__) \ + || defined(__aarch64__) + #define MY_CPU_ARM64 + #define MY_CPU_NAME "arm64" + #define MY_CPU_64BIT +#endif + + +#if defined(_M_ARM) \ + || defined(_M_ARM_NT) \ + || defined(_M_ARMT) \ + || defined(__arm__) \ + || defined(__thumb__) \ + || defined(__ARMEL__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEL__) \ + || defined(__THUMBEB__) + #define MY_CPU_ARM + + #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_NAME "armt" + #else + #define MY_CPU_NAME "arm" + #endif + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_IA64) \ + || defined(__ia64__) + #define MY_CPU_IA64 + #define MY_CPU_NAME "ia64" + #define MY_CPU_64BIT +#endif + + +#if defined(__mips64) \ + || defined(__mips64__) \ + || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3)) + #define MY_CPU_NAME "mips64" + #define MY_CPU_64BIT +#elif defined(__mips__) + #define MY_CPU_NAME "mips" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(__ppc__) \ + || defined(__powerpc__) \ + || defined(__PPC__) \ + || defined(_POWER) + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(_LP64) \ + || defined(__64BIT__) + #ifdef __ILP32__ + #define MY_CPU_NAME "ppc64-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "ppc64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#else + #define MY_CPU_NAME "ppc" + #define MY_CPU_SIZEOF_POINTER 4 + /* #define MY_CPU_32BIT */ +#endif +#endif + + +#if defined(__sparc64__) + #define MY_CPU_NAME "sparc64" + #define MY_CPU_64BIT +#elif defined(__sparc__) + #define MY_CPU_NAME "sparc" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) +#define MY_CPU_X86_OR_AMD64 +#endif + +#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) +#define MY_CPU_ARM_OR_ARM64 +#endif + + +#ifdef _WIN32 + + #ifdef MY_CPU_ARM + #define MY_CPU_ARM_LE + #endif + + #ifdef MY_CPU_ARM64 + #define MY_CPU_ARM64_LE + #endif + + #ifdef _M_IA64 + #define MY_CPU_IA64_LE + #endif + +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_LE) \ + || defined(MY_CPU_ARM64_LE) \ + || defined(MY_CPU_IA64_LE) \ + || defined(__LITTLE_ENDIAN__) \ + || defined(__ARMEL__) \ + || defined(__THUMBEL__) \ + || defined(__AARCH64EL__) \ + || defined(__MIPSEL__) \ + || defined(__MIPSEL) \ + || defined(_MIPSEL) \ + || defined(__BFIN__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) + #define MY_CPU_LE +#endif + +#if defined(__BIG_ENDIAN__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEB__) \ + || defined(__AARCH64EB__) \ + || defined(__MIPSEB__) \ + || defined(__MIPSEB) \ + || defined(_MIPSEB) \ + || defined(__m68k__) \ + || defined(__s390__) \ + || defined(__s390x__) \ + || defined(__zarch__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define MY_CPU_BE +#endif + + +#if defined(MY_CPU_LE) && defined(MY_CPU_BE) + #error Stop_Compiling_Bad_Endian +#endif + + +#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) + #error Stop_Compiling_Bad_32_64_BIT +#endif + +#ifdef __SIZEOF_POINTER__ + #ifdef MY_CPU_SIZEOF_POINTER + #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__ + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE + #endif + #else + #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__ + #endif +#endif + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +#if defined (_LP64) + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE +#endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1300 + #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1)) + #define MY_CPU_pragma_pop __pragma(pack(pop)) + #else + #define MY_CPU_pragma_pack_push_1 + #define MY_CPU_pragma_pop + #endif +#else + #ifdef __xlC__ + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") + #define MY_CPU_pragma_pop _Pragma("pack()") + #else + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)") + #define MY_CPU_pragma_pop _Pragma("pack(pop)") + #endif +#endif + + +#ifndef MY_CPU_NAME + #ifdef MY_CPU_LE + #define MY_CPU_NAME "LE" + #elif defined(MY_CPU_BE) + #define MY_CPU_NAME "BE" + #else + /* + #define MY_CPU_NAME "" + */ + #endif +#endif + + + + + +#ifdef MY_CPU_LE + #if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM64) + #define MY_CPU_LE_UNALIGN + #define MY_CPU_LE_UNALIGN_64 + #elif defined(__ARM_FEATURE_UNALIGNED) + /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. + So we can't use unaligned 64-bit operations. */ + #define MY_CPU_LE_UNALIGN + #endif +#endif + + +#ifdef MY_CPU_LE_UNALIGN + +#define GetUi16(p) (*(const UInt16 *)(const void *)(p)) +#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) +#ifdef MY_CPU_LE_UNALIGN_64 +#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#endif + +#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } +#ifdef MY_CPU_LE_UNALIGN_64 +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } +#endif + +#else + +#define GetUi16(p) ( (UInt16) ( \ + ((const Byte *)(p))[0] | \ + ((UInt16)((const Byte *)(p))[1] << 8) )) + +#define GetUi32(p) ( \ + ((const Byte *)(p))[0] | \ + ((UInt32)((const Byte *)(p))[1] << 8) | \ + ((UInt32)((const Byte *)(p))[2] << 16) | \ + ((UInt32)((const Byte *)(p))[3] << 24)) + +#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); } + +#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); \ + _ppp_[2] = (Byte)(_vvv_ >> 16); \ + _ppp_[3] = (Byte)(_vvv_ >> 24); } + +#endif + + +#ifndef MY_CPU_LE_UNALIGN_64 + +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) + +#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ + SetUi32(_ppp2_ , (UInt32)_vvv2_); \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } + +#endif + + + + +#ifdef __has_builtin + #define MY__has_builtin(x) __has_builtin(x) +#else + #define MY__has_builtin(x) 0 +#endif + +#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) + +/* Note: we use bswap instruction, that is unsupported in 386 cpu */ + +#include + +#pragma intrinsic(_byteswap_ushort) +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) + +/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ +#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p)) + +#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) + +#elif defined(MY_CPU_LE_UNALIGN) && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) + +/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */ +#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p)) + +#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) + +#else + +#define GetBe32(p) ( \ + ((UInt32)((const Byte *)(p))[0] << 24) | \ + ((UInt32)((const Byte *)(p))[1] << 16) | \ + ((UInt32)((const Byte *)(p))[2] << 8) | \ + ((const Byte *)(p))[3] ) + +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) + +#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 24); \ + _ppp_[1] = (Byte)(_vvv_ >> 16); \ + _ppp_[2] = (Byte)(_vvv_ >> 8); \ + _ppp_[3] = (Byte)_vvv_; } + +#endif + + +#ifndef GetBe16 + +#define GetBe16(p) ( (UInt16) ( \ + ((UInt16)((const Byte *)(p))[0] << 8) | \ + ((const Byte *)(p))[1] )) + +#endif + + + +#ifdef MY_CPU_X86_OR_AMD64 + +typedef struct +{ + UInt32 maxFunc; + UInt32 vendor[3]; + UInt32 ver; + UInt32 b; + UInt32 c; + UInt32 d; +} Cx86cpuid; + +enum +{ + CPU_FIRM_INTEL, + CPU_FIRM_AMD, + CPU_FIRM_VIA +}; + +void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d); + +BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p); +int x86cpuid_GetFirm(const Cx86cpuid *p); + +#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF)) +#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) +#define x86cpuid_GetStepping(ver) (ver & 0xF) + +BoolInt CPU_Is_InOrder(void); + +BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_SSSE3(void); +BoolInt CPU_IsSupported_SSE41(void); +BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_PageGB(void); + +#elif defined(MY_CPU_ARM_OR_ARM64) + +BoolInt CPU_IsSupported_CRC32(void); +BoolInt CPU_IsSupported_NEON(void); + +#if defined(_WIN32) +BoolInt CPU_IsSupported_CRYPTO(void); +#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO +#else +BoolInt CPU_IsSupported_SHA1(void); +BoolInt CPU_IsSupported_SHA2(void); +BoolInt CPU_IsSupported_AES(void); +#endif + +#endif + +#if defined(__APPLE__) +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); +#endif + +EXTERN_C_END + +#endif diff --git a/cut-n-paste/unarr/lzmasdk/Ppmd.h b/cut-n-paste/unarr/lzmasdk/Ppmd.h new file mode 100644 index 00000000..b1987920 --- /dev/null +++ b/cut-n-paste/unarr/lzmasdk/Ppmd.h @@ -0,0 +1,167 @@ +/* Ppmd.h -- PPMD codec common code +2021-04-13 : Igor Pavlov : Public domain +This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ + +#ifndef __PPMD_H +#define __PPMD_H + +#include "CpuArch.h" + +EXTERN_C_BEGIN + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +/* + PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block. + if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields. + if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields. + if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed, + if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional, + and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit. + PPMD code works slightly faster in (PPMD_32BIT) mode. +*/ + #define PPMD_32BIT +#endif + +#define PPMD_INT_BITS 7 +#define PPMD_PERIOD_BITS 7 +#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS)) + +#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift)) +#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2) +#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob)) +#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob)) + +#define PPMD_N1 4 +#define PPMD_N2 4 +#define PPMD_N3 4 +#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4) +#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4) + +MY_CPU_pragma_pack_push_1 +/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */ + +/* SEE-contexts for PPM-contexts with masked symbols */ +typedef struct +{ + UInt16 Summ; /* Freq */ + Byte Shift; /* Speed of Freq change; low Shift is for fast change */ + Byte Count; /* Count to next change of Shift */ +} CPpmd_See; + +#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ + { (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); } + + +typedef struct +{ + Byte Symbol; + Byte Freq; + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State; + +typedef struct CPpmd_State2_ +{ + Byte Symbol; + Byte Freq; +} CPpmd_State2; + +typedef struct CPpmd_State4_ +{ + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State4; + +MY_CPU_pragma_pop + +/* + PPMD code can write full CPpmd_State structure data to CPpmd*_Context + at (byte offset = 2) instead of some fields of original CPpmd*_Context structure. + + If we use pointers to different types, but that point to shared + memory space, we can have aliasing problem (strict aliasing). + + XLC compiler in -O2 mode can change the order of memory write instructions + in relation to read instructions, if we have use pointers to different types. + + To solve that aliasing problem we use combined CPpmd*_Context structure + with unions that contain the fields from both structures: + the original CPpmd*_Context and CPpmd_State. + So we can access the fields from both structures via one pointer, + and the compiler doesn't change the order of write instructions + in relation to read instructions. + + If we don't use memory write instructions to shared memory in + some local code, and we use only reading instructions (read only), + then probably it's safe to use pointers to different types for reading. +*/ + + + +#ifdef PPMD_32BIT + + #define Ppmd_Ref_Type(type) type * + #define Ppmd_GetRef(p, ptr) (ptr) + #define Ppmd_GetPtr(p, ptr) (ptr) + #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr) + +#else + + #define Ppmd_Ref_Type(type) UInt32 + #define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) + #define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs))) + #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs)) + +#endif // PPMD_32BIT + + +typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref; +typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref; +typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref; + + +/* +#ifdef MY_CPU_LE_UNALIGN +// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache. +#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0) +#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v) + +#else +*/ + +/* + We can write 16-bit halves to 32-bit (Successor) field in any selected order. + But the native order is more consistent way. + So we use the native order, if LE/BE order can be detected here at compile time. +*/ + +#ifdef MY_CPU_BE + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); } + +#else + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); } + +#endif + +// #endif + + +#define PPMD_SetAllBitsIn256Bytes(p) \ + { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \ + p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }} + +EXTERN_C_END + +#endif diff --git a/cut-n-paste/unarr/lzmasdk/Ppmd7.c b/cut-n-paste/unarr/lzmasdk/Ppmd7.c new file mode 100644 index 00000000..cf401cb3 --- /dev/null +++ b/cut-n-paste/unarr/lzmasdk/Ppmd7.c @@ -0,0 +1,1104 @@ +/* Ppmd7.c -- PPMdH codec +2021-04-13 : Igor Pavlov : Public domain +This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ + +#include "Precomp.h" + +#include + +#include "Ppmd7.h" + +/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */ +// #define PPMD7_ORDER_0_SUPPPORT + +MY_ALIGN(16) +static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) +static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; + +#define MAX_FREQ 124 +#define UNIT_SIZE 12 + +#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) +#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) +#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx]) + +#define REF(ptr) Ppmd_GetRef(p, ptr) + +#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +#define STATS(ctx) Ppmd7_GetStats(p, ctx) +#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx) +#define SUFFIX(ctx) CTX((ctx)->Suffix) + +typedef CPpmd7_Context * CTX_PTR; + +struct CPpmd7_Node_; + +typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref; + +typedef struct CPpmd7_Node_ +{ + UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */ + UInt16 NU; + CPpmd7_Node_Ref Next; /* must be at offset >= 4 */ + CPpmd7_Node_Ref Prev; +} CPpmd7_Node; + +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node) + +void Ppmd7_Construct(CPpmd7 *p) +{ + unsigned i, k, m; + + p->Base = NULL; + + for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) + { + unsigned step = (i >= 12 ? 4 : (i >> 2) + 1); + do { p->Units2Indx[k++] = (Byte)i; } while (--step); + p->Indx2Units[i] = (Byte)k; + } + + p->NS2BSIndx[0] = (0 << 1); + p->NS2BSIndx[1] = (1 << 1); + memset(p->NS2BSIndx + 2, (2 << 1), 9); + memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11); + + for (i = 0; i < 3; i++) + p->NS2Indx[i] = (Byte)i; + + for (m = i, k = 1; i < 256; i++) + { + p->NS2Indx[i] = (Byte)m; + if (--k == 0) + k = (++m) - 2; + } + + memcpy(p->ExpEscape, PPMD7_kExpEscape, 16); +} + + +void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Base); + p->Size = 0; + p->Base = NULL; +} + + +BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc) +{ + if (!p->Base || p->Size != size) + { + Ppmd7_Free(p, alloc); + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) + return False; + p->Size = size; + } + return True; +} + + + +// ---------- Internal Memory Allocator ---------- + +/* We can use CPpmd7_Node in list of free units (as in Ppmd8) + But we still need one additional list walk pass in GlueFreeBlocks(). + So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode() +*/ + +#define EMPTY_NODE 0 + + +static void InsertNode(CPpmd7 *p, void *node, unsigned indx) +{ + *((CPpmd_Void_Ref *)node) = p->FreeList[indx]; + // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx]; + + p->FreeList[indx] = REF(node); + +} + + +static void *RemoveNode(CPpmd7 *p, unsigned indx) +{ + CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]); + p->FreeList[indx] = *node; + // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]); + // p->FreeList[indx] = node->Next; + return node; +} + + +static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) +{ + unsigned i, nu = I2U(oldIndx) - I2U(newIndx); + ptr = (Byte *)ptr + U2B(I2U(newIndx)); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); + } + InsertNode(p, ptr, i); +} + + +/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */ + +typedef union _CPpmd7_Node_Union +{ + CPpmd7_Node Node; + CPpmd7_Node_Ref NextRef; +} CPpmd7_Node_Union; + +/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks() + we use single linked list similar to Ppmd8 code */ + + +static void GlueFreeBlocks(CPpmd7 *p) +{ + /* + we use first UInt16 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0 + CPpmd7_Context { UInt16 NumStats; : NumStats != 0 + CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record + : Stamp == 1 for head record and guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record. + */ + CPpmd7_Node_Ref head, n = 0; + + p->GlueCount = 255; + + + /* we set guard NODE at LoUnit */ + if (p->LoUnit != p->HiUnit) + ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1; + + { + /* Create list of free blocks. + We still need one additional list walk pass before Glue. */ + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + const UInt16 nu = I2U_UInt16(i); + CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + /* Don't change the order of the following commands: */ + CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next); + const CPpmd7_Node_Ref tmp = next; + next = un->NextRef; + un->Node.Stamp = EMPTY_NODE; + un->Node.NU = nu; + un->Node.Next = n; + n = tmp; + } + } + } + + head = n; + /* Glue and Fill must walk the list in same direction */ + { + /* Glue free blocks */ + CPpmd7_Node_Ref *prev = &head; + while (n) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + n = node->Next; + if (nu == 0) + { + *prev = n; + continue; + } + prev = &node->Next; + for (;;) + { + CPpmd7_Node *node2 = node + nu; + nu += node2->NU; + if (node2->Stamp != EMPTY_NODE || nu >= 0x10000) + break; + node->NU = (UInt16)nu; + node2->NU = 0; + } + } + } + + /* Fill lists of free blocks */ + for (n = head; n != 0;) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) + InsertNode(p, node, PPMD_NUM_INDEXES - 1); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + InsertNode(p, node + k, (unsigned)nu - k - 1); + } + InsertNode(p, node, i); + } +} + + +MY_NO_INLINE +static void *AllocUnitsRare(CPpmd7 *p, unsigned indx) +{ + unsigned i; + + if (p->GlueCount == 0) + { + GlueFreeBlocks(p); + if (p->FreeList[indx] != 0) + return RemoveNode(p, indx); + } + + i = indx; + + do + { + if (++i == PPMD_NUM_INDEXES) + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; + p->GlueCount--; + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL; + } + } + while (p->FreeList[i] == 0); + + { + void *block = RemoveNode(p, i); + SplitBlock(p, block, i, indx); + return block; + } +} + + +static void *AllocUnits(CPpmd7 *p, unsigned indx) +{ + if (p->FreeList[indx] != 0) + return RemoveNode(p, indx); + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } + } + return AllocUnitsRare(p, indx); +} + + +#define MyMem12Cpy(dest, src, num) \ + { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ + do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } + + +/* +static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU) +{ + unsigned i0 = U2I(oldNU); + unsigned i1 = U2I(newNU); + if (i0 == i1) + return oldPtr; + if (p->FreeList[i1] != 0) + { + void *ptr = RemoveNode(p, i1); + MyMem12Cpy(ptr, oldPtr, newNU); + InsertNode(p, oldPtr, i0); + return ptr; + } + SplitBlock(p, oldPtr, i0, i1); + return oldPtr; +} +*/ + + +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) +{ + Ppmd_SET_SUCCESSOR(p, v); +} + + + +MY_NO_INLINE +static +void RestartModel(CPpmd7 *p) +{ + unsigned i, k; + + memset(p->FreeList, 0, sizeof(p->FreeList)); + + p->Text = p->Base + p->AlignOffset; + p->HiUnit = p->Text + p->Size; + p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; + p->GlueCount = 0; + + p->OrderFall = p->MaxOrder; + p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; + p->PrevSuccess = 0; + + { + CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + + mc->NumStats = 256; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + SetSuccessor(s, 0); + } + + #ifdef PPMD7_ORDER_0_SUPPPORT + if (p->MaxOrder == 0) + { + CPpmd_Void_Ref r = REF(mc); + s = p->FoundState; + for (i = 0; i < 256; i++, s++) + SetSuccessor(s, r); + return; + } + #endif + } + + for (i = 0; i < 128; i++) + + + + for (k = 0; k < 8; k++) + { + unsigned m; + UInt16 *dest = p->BinSumm[i] + k; + UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2)); + for (m = 0; m < 64; m += 8) + dest[m] = val; + } + + + for (i = 0; i < 25; i++) + { + + CPpmd_See *s = p->See[i]; + + + + unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 16; k++, s++) + { + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); + s->Count = 4; + } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ +} + + +void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder) +{ + p->MaxOrder = maxOrder; + + RestartModel(p); +} + + + +/* + CreateSuccessors() + It's called when (FoundState->Successor) is RAW-Successor, + that is the link to position in Raw text. + So we create Context records and write the links to + FoundState->Successor and to identical RAW-Successors in suffix + contexts of MinContex. + + The function returns: + if (OrderFall == 0) then MinContext is already at MAX order, + { return pointer to new or existing context of same MAX order } + else + { return pointer to new real context that will be (Order+1) in comparison with MinContext + + also it can return pointer to real context of same order, +*/ + +MY_NO_INLINE +static CTX_PTR CreateSuccessors(CPpmd7 *p) +{ + CTX_PTR c = p->MinContext; + CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); + Byte newSym, newFreq; + unsigned numPs = 0; + CPpmd_State *ps[PPMD7_MAX_ORDER]; + + if (p->OrderFall != 0) + ps[numPs++] = p->FoundState; + + while (c->Suffix) + { + CPpmd_Void_Ref successor; + CPpmd_State *s; + c = SUFFIX(c); + + + if (c->NumStats != 1) + { + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + + } + else + { + s = ONE_STATE(c); + + } + successor = SUCCESSOR(s); + if (successor != upBranch) + { + // (c) is real record Context here, + c = CTX(successor); + if (numPs == 0) + { + // (c) is real record MAX Order Context here, + // So we don't need to create any new contexts. + return c; + } + break; + } + ps[numPs++] = s; + } + + // All created contexts will have single-symbol with new RAW-Successor + // All new RAW-Successors will point to next position in RAW text + // after FoundState->Successor + + newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch); + upBranch++; + + + if (c->NumStats == 1) + newFreq = ONE_STATE(c)->Freq; + else + { + UInt32 cf, s0; + CPpmd_State *s; + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + cf - is frequency of symbol that will be Successor in new context records. + s0 - is commulative frequency sum of another symbols from parent context. + max(newFreq)= (s->Freq + 1), when (s0 == 1) + we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[] + so (s->Freq < 128) - is requirement for multi-symbol contexts + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1)); + } + + // Create new single-symbol contexts from low order to high order in loop + + do + { + CTX_PTR c1; + /* = AllocContext(p); */ + if (p->HiUnit != p->LoUnit) + c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); + else if (p->FreeList[0] != 0) + c1 = (CTX_PTR)RemoveNode(p, 0); + else + { + c1 = (CTX_PTR)AllocUnitsRare(p, 0); + if (!c1) + return NULL; + } + + c1->NumStats = 1; + ONE_STATE(c1)->Symbol = newSym; + ONE_STATE(c1)->Freq = newFreq; + SetSuccessor(ONE_STATE(c1), upBranch); + c1->Suffix = REF(c); + SetSuccessor(ps[--numPs], REF(c1)); + c = c1; + } + while (numPs != 0); + + return c; +} + + + +#define SwapStates(s) \ + { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; } + + +void Ppmd7_UpdateModel(CPpmd7 *p); +MY_NO_INLINE +void Ppmd7_UpdateModel(CPpmd7 *p) +{ + CPpmd_Void_Ref maxSuccessor, minSuccessor; + CTX_PTR c, mc; + unsigned s0, ns; + + + + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) + { + /* Update Freqs in Suffix Context */ + + c = SUFFIX(p->MinContext); + + if (c->NumStats == 1) + { + CPpmd_State *s = ONE_STATE(c); + if (s->Freq < 32) + s->Freq++; + } + else + { + CPpmd_State *s = STATS(c); + Byte sym = p->FoundState->Symbol; + + if (s->Symbol != sym) + { + do + { + // s++; if (s->Symbol == sym) break; + s++; + } + while (s->Symbol != sym); + + if (s[0].Freq >= s[-1].Freq) + { + SwapStates(s); + s--; + } + } + + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + } + + + if (p->OrderFall == 0) + { + /* MAX ORDER context */ + /* (FoundState->Successor) is RAW-Successor. */ + p->MaxContext = p->MinContext = CreateSuccessors(p); + if (!p->MinContext) + { + RestartModel(p); + return; + } + SetSuccessor(p->FoundState, REF(p->MinContext)); + return; + } + + + /* NON-MAX ORDER context */ + + { + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + RestartModel(p); + return; + } + maxSuccessor = REF(text); + } + + minSuccessor = SUCCESSOR(p->FoundState); + + if (minSuccessor) + { + // there is Successor for FoundState in MinContext. + // So the next context will be one order higher than MinContext. + + if (minSuccessor <= maxSuccessor) + { + // minSuccessor is RAW-Successor. So we will create real contexts records: + CTX_PTR cs = CreateSuccessors(p); + if (!cs) + { + RestartModel(p); + return; + } + minSuccessor = REF(cs); + } + + // minSuccessor now is real Context pointer that points to existing (Order+1) context + + if (--p->OrderFall == 0) + { + /* + if we move to MaxOrder context, then minSuccessor will be common Succesor for both: + MinContext that is (MaxOrder - 1) + MaxContext that is (MaxOrder) + so we don't need new RAW-Successor, and we can use real minSuccessor + as succssors for both MinContext and MaxContext. + */ + maxSuccessor = minSuccessor; + + /* + if (MaxContext != MinContext) + { + there was order fall from MaxOrder and we don't need current symbol + to transfer some RAW-Succesors to real contexts. + So we roll back pointer in raw data for one position. + } + */ + p->Text -= (p->MaxContext != p->MinContext); + } + } + else + { + /* + FoundState has NULL-Successor here. + And only root 0-order context can contain NULL-Successors. + We change Successor in FoundState to RAW-Successor, + And next context will be same 0-order root Context. + */ + SetSuccessor(p->FoundState, maxSuccessor); + minSuccessor = REF(p->MinContext); + } + + mc = p->MinContext; + c = p->MaxContext; + + p->MaxContext = p->MinContext = CTX(minSuccessor); + + if (c == mc) + return; + + // s0 : is pure Escape Freq + s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1); + + do + { + unsigned ns1; + UInt32 sum; + + if ((ns1 = c->NumStats) != 1) + { + if ((ns1 & 1) == 0) + { + /* Expand for one UNIT */ + unsigned oldNU = ns1 >> 1; + unsigned i = U2I(oldNU); + if (i != U2I((size_t)oldNU + 1)) + { + void *ptr = AllocUnits(p, i + 1); + void *oldPtr; + if (!ptr) + { + RestartModel(p); + return; + } + oldPtr = STATS(c); + MyMem12Cpy(ptr, oldPtr, oldNU); + InsertNode(p, oldPtr, i); + c->Union4.Stats = STATS_REF(ptr); + } + } + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 3 here. + total increase of Union2.SummFreq for all symbols is less than 256 here */ + sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < 0x9000). So we don't truncate (sum) to 16-bit */ + // sum = (UInt16)sum; + } + else + { + // instead of One-symbol context we create 2-symbol context + CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0); + if (!s) + { + RestartModel(p); + return; + } + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context + s->Freq = (Byte)freq; + // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here + sum = freq + p->InitEsc + (ns > 3); + } + } + + { + CPpmd_State *s = STATS(c) + ns1; + UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq; + UInt32 sf = (UInt32)s0 + sum; + s->Symbol = p->FoundState->Symbol; + c->NumStats = (UInt16)(ns1 + 1); + SetSuccessor(s, maxSuccessor); + + if (cf < 6 * sf) + { + cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf); + sum += 3; + /* It can add (0, 1, 2) to Escape_Freq */ + } + else + { + cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; + } + c = SUFFIX(c); + } + while (c != mc); +} + + + +MY_NO_INLINE +static void Rescale(CPpmd7 *p) +{ + unsigned i, adder, sumFreq, escFreq; + CPpmd_State *stats = STATS(p->MinContext); + CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) + { + CPpmd_State tmp = *s; + do + s[0] = s[-1]; + while (--s != stats); + *s = tmp; + } + + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + /* + if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context + if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context + */ + + adder = (p->OrderFall != 0); + + #ifdef PPMD7_ORDER_0_SUPPPORT + adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = (unsigned)p->MinContext->NumStats - 1; + s->Freq = (Byte)sumFreq; + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + CPpmd_State tmp = *s; + CPpmd_State *s1 = s; + do + { + s1[0] = s1[-1]; + } + while (--s1 != stats && freq > s1[-1].Freq); + *s1 = tmp; + } + } + while (--i); + + if (s->Freq == 0) + { + /* Remove all items with Freq == 0 */ + CPpmd7_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + /* We increase (escFreq) for the number of removed symbols. + So we will have (0.5) increase for Escape_Freq in avarage per + removed symbol after Escape_Freq halving */ + escFreq += i; + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (UInt16)(numStatsNew); + n0 = (numStats + 1) >> 1; + + if (numStatsNew == 1) + { + /* Create Single-Symbol context */ + unsigned freq = stats->Freq; + + do + { + escFreq >>= 1; + freq = (freq + 1) >> 1; + } + while (escFreq > 1); + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; // (freq <= 260 / 4) + p->FoundState = s; + InsertNode(p, stats, U2I(n0)); + return; + } + + n1 = (numStatsNew + 1) >> 1; + if (n0 != n1) + { + // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + unsigned i0 = U2I(n0); + unsigned i1 = U2I(n1); + if (i0 != i1) + { + if (p->FreeList[i1] != 0) + { + void *ptr = RemoveNode(p, i1); + p->MinContext->Union4.Stats = STATS_REF(ptr); + MyMem12Cpy(ptr, (const void *)stats, n1); + InsertNode(p, stats, i0); + } + else + SplitBlock(p, stats, i0, i1); + } + } + } + { + CPpmd7_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + // Escape_Freq halving here + p->FoundState = STATS(mc); + } +} + + +CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) +{ + CPpmd_See *see; + const CPpmd7_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 256) + { + unsigned nonMasked = numStats - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats) + + 4 * (unsigned)(numMasked > nonMasked) + + p->HiBitsFlag; + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + *escFreq = r + (r == 0); + } + } + else + { + see = &p->DummySee; + *escFreq = 1; + } + return see; +} + + +static void NextContext(CPpmd7 *p) +{ + CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); +} + + +void Ppmd7_Update1(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + SwapStates(s); + p->FoundState = --s; + if (freq > MAX_FREQ) + Rescale(p); + } + NextContext(p); +} + + +void Ppmd7_Update1_0(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + CPpmd7_Context *mc = p->MinContext; + unsigned freq = s->Freq; + unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq > summFreq); + p->RunLength += (int)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Rescale(p); + NextContext(p); +} + + +/* +void Ppmd7_UpdateBin(CPpmd7 *p) +{ + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 128)); + p->PrevSuccess = 1; + p->RunLength++; + NextContext(p); +} +*/ + +void Ppmd7_Update2(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->RunLength = p->InitRL; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Rescale(p); + Ppmd7_UpdateModel(p); +} + + + +/* +PPMd Memory Map: +{ + [ 0 ] contains subset of original raw text, that is required to create context + records, Some symbols are not written, when max order context was reached + [ Text ] free area + [ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records + [ LoUnit ] free area for CPpmd_State and CPpmd7_Context items +[ HiUnit ] CPpmd7_Context records + [ Size ] end of array +} + +These addresses don't cross at any time. +And the following condtions is true for addresses: + (0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size) + +Raw text is BYTE--aligned. +the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs. + +Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record. +The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors. +The code doesn't free UNITs allocated for CPpmd7_Context records. + +The code calls RestartModel(), when there is no free memory for allocation. +And RestartModel() changes the state to orignal start state, with full free block. + + +The code allocates UNITs with the following order: + +Allocation of 1 UNIT for Context record + - from free space (HiUnit) down to (LoUnit) + - from FreeList[0] + - AllocUnitsRare() + +AllocUnits() for CPpmd_State vectors: + - from FreeList[i] + - from free space (LoUnit) up to (HiUnit) + - AllocUnitsRare() + +AllocUnitsRare() + - if (GlueCount == 0) + { Glue lists, GlueCount = 255, allocate from FreeList[i]] } + - loop for all higher sized FreeList[...] lists + - from (UnitsStart - Text), GlueCount-- + - ERROR + + +Each Record with Context contains the CPpmd_State vector, where each +CPpmd_State contains the link to Successor. +There are 3 types of Successor: + 1) NULL-Successor - NULL pointer. NULL-Successor links can be stored + only in 0-order Root Context Record. + We use 0 value as NULL-Successor + 2) RAW-Successor - the link to position in raw text, + that "RAW-Successor" is being created after first + occurrence of new symbol for some existing context record. + (RAW-Successor > 0). + 3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1), + that record is being created when we go via RAW-Successor again. + +For any successors at any time: the following condtions are true for Successor links: +(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor) + + +---------- Symbol Frequency, SummFreq and Range in Range_Coder ---------- + +CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq + +The PPMd code tries to fulfill the condition: + (SummFreq <= (256 * 128 = RC::kBot)) + +We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124) +So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol. +If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7. +SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions. +Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for +max-order context. + +When the PPMd code still break (Total <= RC::Range) condition in range coder, +we have two ways to resolve that problem: + 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases. + 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value. +*/ diff --git a/cut-n-paste/unarr/lzmasdk/Ppmd7.h b/cut-n-paste/unarr/lzmasdk/Ppmd7.h new file mode 100644 index 00000000..d31809ae --- /dev/null +++ b/cut-n-paste/unarr/lzmasdk/Ppmd7.h @@ -0,0 +1,181 @@ +/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + + +#ifndef __PPMD7_H +#define __PPMD7_H + +#include "Ppmd.h" + +EXTERN_C_BEGIN + +#define PPMD7_MIN_ORDER 2 +#define PPMD7_MAX_ORDER 64 + +#define PPMD7_MIN_MEM_SIZE (1 << 11) +#define PPMD7_MAX_MEM_SIZE (0xFFFFFFFF - 12 * 3) + +struct CPpmd7_Context_; + +typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref; + +// MY_CPU_pragma_pack_push_1 + +typedef struct CPpmd7_Context_ +{ + UInt16 NumStats; + + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + + CPpmd7_Context_Ref Suffix; +} CPpmd7_Context; + +// MY_CPU_pragma_pop + +#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + + + + +typedef struct +{ + UInt32 Range; + UInt32 Code; + UInt32 Low; + IByteIn *Stream; +} CPpmd7_RangeDec; + + +typedef struct +{ + UInt32 Range; + Byte Cache; + // Byte _dummy_[3]; + UInt64 Low; + UInt64 CacheSize; + IByteOut *Stream; +} CPpmd7z_RangeEnc; + + +typedef struct +{ + CPpmd7_Context *MinContext, *MaxContext; + CPpmd_State *FoundState; + unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, HiBitsFlag; + Int32 RunLength, InitRL; /* must be 32-bit at least */ + + UInt32 Size; + UInt32 GlueCount; + UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; + + + + + union + { + CPpmd7_RangeDec dec; + CPpmd7z_RangeEnc enc; + } rc; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment + Byte Units2Indx[128]; + CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; + + Byte NS2BSIndx[256], NS2Indx[256]; + Byte ExpEscape[16]; + CPpmd_See DummySee, See[25][16]; + UInt16 BinSumm[128][64]; + // int LastSymbol; +} CPpmd7; + + +void Ppmd7_Construct(CPpmd7 *p); +BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc); +void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc); +void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder); +#define Ppmd7_WasAllocated(p) ((p)->Base != NULL) + + +/* ---------- Internal Functions ---------- */ + +#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context) +#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) + +void Ppmd7_Update1(CPpmd7 *p); +void Ppmd7_Update1_0(CPpmd7 *p); +void Ppmd7_Update2(CPpmd7 *p); + +#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3)) +#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4)) +// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3)) +// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4)) + +#define Ppmd7_GetBinSumm(p) \ + &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \ + + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \ + + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ] + +CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale); + + +/* +We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure: + 1) Ppmd7a_*: original PPMdH + 2) Ppmd7z_*: modified PPMdH with 7z Range Coder +Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH) +*/ + +/* ---------- Decode ---------- */ + +#define PPMD7_SYM_END (-1) +#define PPMD7_SYM_ERROR (-2) + +/* +You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init() + +Ppmd7*_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD7_SYM_END : End of payload marker + -2 : PPMD7_SYM_ERROR : Data error +*/ + +/* Ppmd7a_* : original PPMdH */ +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7a_DecodeSymbol(CPpmd7 *p); + +/* Ppmd7z_* : modified PPMdH with 7z Range Coder */ +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7z_DecodeSymbol(CPpmd7 *p); +// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim); + + +/* ---------- Encode ---------- */ + +void Ppmd7z_Init_RangeEnc(CPpmd7 *p); +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p); +// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol); +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim); + +EXTERN_C_END + +#endif diff --git a/cut-n-paste/unarr/lzmasdk/Ppmd7Dec.c b/cut-n-paste/unarr/lzmasdk/Ppmd7Dec.c new file mode 100644 index 00000000..55d74ff9 --- /dev/null +++ b/cut-n-paste/unarr/lzmasdk/Ppmd7Dec.c @@ -0,0 +1,297 @@ +/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + + +#include "Precomp.h" + +#include "Ppmd7.h" + +#define kTopValue (1 << 24) + + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + if (READ_BYTE(p) != 0) + return False; + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \ + { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8; + +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R (&p->rc.dec) + +MY_FORCE_INLINE +// MY_NO_INLINE +static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size) +{ + + + R->Code -= start * R->Range; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) RangeDec_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((unsigned char *)charMask)[sym] +// MY_FORCE_INLINE +// static +int Ppmd7z_DecodeSymbol(CPpmd7 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 1) + { + CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + UInt32 summFreq = p->MinContext->Union2.SummFreq; + + + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq); + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = (unsigned)p->MinContext->NumStats - 1; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt); + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); + PPMD_SetAllBitsIn256Bytes(charMask); + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt16 *prob = Ppmd7_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM_1(R) + /* we can use single byte normalization here because of + (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */ + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CTX_PTR c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + + R->Code -= size0; + R->Range -= size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask); + MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + + CPpmd_See *see; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd7_GetStats(p, mc); + + { + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); + hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + + + + + count = RC_GetThreshold(freqSum); + + if (count < hiCnt) + { + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (unsigned)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + }; + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_Update(see); + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update2(p); + return sym; + } + + if (count >= freqSum) + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt); + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +/* +Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim) +{ + int sym = 0; + if (buf != lim) + do + { + sym = Ppmd7z_DecodeSymbol(p); + if (sym < 0) + break; + *buf = (Byte)sym; + } + while (++buf < lim); + p->LastSymbol = sym; + return buf; +} +*/ diff --git a/cut-n-paste/unarr/lzmasdk/Precomp.h b/cut-n-paste/unarr/lzmasdk/Precomp.h new file mode 100644 index 00000000..25d8aedf --- /dev/null +++ b/cut-n-paste/unarr/lzmasdk/Precomp.h @@ -0,0 +1,10 @@ +/* Precomp.h -- StdAfx +2013-11-12 : Igor Pavlov : Public domain */ + +#ifndef __7Z_PRECOMP_H +#define __7Z_PRECOMP_H + +/* #include "Compiler.h" */ +/* #include "7zTypes.h" */ + +#endif diff --git a/cut-n-paste/unarr/lzmasdk/README.lzmasdk b/cut-n-paste/unarr/lzmasdk/README.lzmasdk new file mode 100644 index 00000000..d6051387 --- /dev/null +++ b/cut-n-paste/unarr/lzmasdk/README.lzmasdk @@ -0,0 +1,15 @@ +To update to the latest version of the LZMA SDK: +1. Grab the latest version from: + https://sourceforge.net/projects/sevenzip/files/LZMA%20SDK/ + and unpack it +2. For the files in evince's copy/paste, copy the originals from the + C/ sub-directory of the SDK: + for i in *.[ch] ; do cp /tmp/lzma1803/C/$i . ; done +3. Convert the copied files to Unix format: + dos2unix *.[ch] +4. Use "git add -p" to apply only the sections that are of interest + and do not regress warning fixes (see commits a2aa919 and 5e1a0ef + for example) + +Note that you might end up with an empty commit (for example 79b5fe5) +which is fine, as long as the change is documented. diff --git a/cut-n-paste/unarr/meson.build b/cut-n-paste/unarr/meson.build new file mode 100644 index 00000000..3f148c57 --- /dev/null +++ b/cut-n-paste/unarr/meson.build @@ -0,0 +1,28 @@ +sources = files( + 'common/conv.c', + 'common/crc32.c', + 'common/stream.c', + 'common/unarr.c', + 'lzmasdk/CpuArch.c', + 'lzmasdk/Ppmd7.c', + 'lzmasdk/Ppmd7Dec.c', + 'rar/filter-rar.c', + 'rar/huffman-rar.c', + 'rar/parse-rar.c', + 'rar/rar.c', + 'rar/rarvm.c', + 'rar/uncompress-rar.c', +) + +libunarr = static_library( + 'unarr', + sources: sources, + dependencies: zlib_dep, + c_args: [ '-DHAVE_ZLIB', '-DNDEBUG' ], + gnu_symbol_visibility: 'hidden', +) + +libunarr_dep = declare_dependency( + include_directories: include_directories('.'), + link_with: libunarr, +) diff --git a/cut-n-paste/unarr/rar/filter-rar.c b/cut-n-paste/unarr/rar/filter-rar.c new file mode 100644 index 00000000..cbb2916d --- /dev/null +++ b/cut-n-paste/unarr/rar/filter-rar.c @@ -0,0 +1,704 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "rar.h" +#include "rarvm.h" + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRARVirtualMachine.m */ +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRAR30Filter.m */ + +struct MemBitReader { + const uint8_t *bytes; + size_t length; + size_t offset; + uint64_t bits; + int available; + bool at_eof; +}; + +struct RARProgramCode { + RARProgram *prog; + uint8_t *staticdata; + uint32_t staticdatalen; + uint8_t *globalbackup; + uint32_t globalbackuplen; + uint64_t fingerprint; + uint32_t usagecount; + uint32_t oldfilterlength; + struct RARProgramCode *next; +}; + +struct RARFilter { + struct RARProgramCode *prog; + uint32_t initialregisters[8]; + uint8_t *globaldata; + uint32_t globaldatalen; + size_t blockstartpos; + uint32_t blocklength; + uint32_t filteredblockaddress; + uint32_t filteredblocklength; + struct RARFilter *next; +}; + +static bool br_fill(struct MemBitReader *br, int bits) +{ + while (br->available < bits && br->offset < br->length) { + br->bits = (br->bits << 8) | br->bytes[br->offset++]; + br->available += 8; + } + if (bits > br->available) { + br->at_eof = true; + return false; + } + return true; +} + +static inline uint32_t br_bits(struct MemBitReader *br, int bits) +{ + if (bits > br->available && (br->at_eof || !br_fill(br, bits))) + return 0; + return (uint32_t)((br->bits >> (br->available -= bits)) & (((uint64_t)1 << bits) - 1)); +} + +static inline bool br_available(struct MemBitReader *br, int bits) +{ + return !br->at_eof && (bits <= br->available || br_fill(br, bits)); +} + +static uint32_t br_next_rarvm_number(struct MemBitReader *br) +{ + uint32_t val; + switch (br_bits(br, 2)) { + case 0: + return br_bits(br, 4); + case 1: + val = br_bits(br, 8); + if (val >= 16) + return val; + return 0xFFFFFF00 | (val << 4) | br_bits(br, 4); + case 2: + return br_bits(br, 16); + default: + return br_bits(br, 32); + } +} + +static void bw_write32le(uint8_t *dst, uint32_t value) +{ + dst[0] = value & 0xFF; + dst[1] = (value >> 8) & 0xFF; + dst[2] = (value >> 16) & 0xFF; + dst[3] = (value >> 24) & 0xFF; +} + +static void rar_delete_program(struct RARProgramCode *prog) +{ + while (prog) { + struct RARProgramCode *next = prog->next; + RARDeleteProgram(prog->prog); + free(prog->staticdata); + free(prog->globalbackup); + free(prog); + prog = next; + } +} + +static bool rar_parse_operand(struct MemBitReader *br, uint8_t instruction, bool bytemode, uint32_t instrcount, uint8_t *addressmode, uint32_t *value) +{ + if (br_bits(br, 1)) { + *addressmode = RARRegisterAddressingMode((uint8_t)br_bits(br, 3)); + *value = 0; + } + else if (br_bits(br, 1)) { + if (br_bits(br, 1)) { + if (br_bits(br, 1)) + *addressmode = RARAbsoluteAddressingMode; + else + *addressmode = RARIndexedAbsoluteAddressingMode((uint8_t)br_bits(br, 3)); + *value = br_next_rarvm_number(br); + } + else { + *addressmode = RARRegisterIndirectAddressingMode((uint8_t)br_bits(br, 3)); + *value = 0; + } + } + else { + *addressmode = RARImmediateAddressingMode; + if (!bytemode) + *value = br_next_rarvm_number(br); + else + *value = br_bits(br, 8); + if (instrcount != (uint32_t)-1 && RARInstructionIsRelativeJump(instruction)) { + if (*value >= 256) /* absolute address */ + *value -= 256; + else { /* relative address */ + if (*value >= 136) + *value -= 264; + else if (*value >= 16) + *value -= 8; + else if (*value >= 8) + *value -= 16; + *value += instrcount; + } + } + } + return !br->at_eof; +} + +static struct RARProgramCode *rar_compile_program(const uint8_t *bytes, size_t length) +{ + struct MemBitReader br = { 0 }; + struct RARProgramCode *prog; + uint32_t instrcount = 0; + uint8_t xor; + size_t i; + + xor = 0; + for (i = 1; i < length; i++) + xor ^= bytes[i]; + if (!length || xor != bytes[0]) + return NULL; + + br.bytes = bytes; + br.length = length; + br.offset = 1; + + prog = calloc(1, sizeof(*prog)); + if (!prog) + return NULL; + prog->prog = RARCreateProgram(); + if (!prog->prog) { + rar_delete_program(prog); + return NULL; + } + prog->fingerprint = ar_crc32(0, bytes, length) | ((uint64_t)length << 32); + + if (br_bits(&br, 1)) { + prog->staticdatalen = br_next_rarvm_number(&br) + 1; + prog->staticdata = malloc(prog->staticdatalen); + if (!prog->staticdata) { + rar_delete_program(prog); + return NULL; + } + for (i = 0; i < prog->staticdatalen; i++) + prog->staticdata[i] = (uint8_t)br_bits(&br, 8); + } + + while (br_available(&br, 8)) { + bool ok = true; + uint8_t instruction = (uint8_t)br_bits(&br, 4); + bool bytemode = false; + int numargs = 0; + uint8_t addrmode1 = 0, addrmode2 = 0; + uint32_t value1 = 0, value2 = 0; + + if ((instruction & 0x08)) + instruction = ((instruction << 2) | (uint8_t)br_bits(&br, 2)) - 24; + if (RARInstructionHasByteMode(instruction)) + bytemode = br_bits(&br, 1) != 0; + ok = RARProgramAddInstr(prog->prog, instruction, bytemode); + numargs = NumberOfRARInstructionOperands(instruction); + if (ok && numargs >= 1) + ok = rar_parse_operand(&br, instruction, bytemode, instrcount, &addrmode1, &value1); + if (ok && numargs == 2) + ok = rar_parse_operand(&br, instruction, bytemode, (uint32_t)-1, &addrmode2, &value2); + if (ok) + ok = RARSetLastInstrOperands(prog->prog, addrmode1, value1, addrmode2, value2); + if (!ok) { + warn("Invalid RAR program instruction"); + rar_delete_program(prog); + return NULL; + } + instrcount++; + } + + if (!RARIsProgramTerminated(prog->prog)) { + if (!RARProgramAddInstr(prog->prog, RARRetInstruction, false)) { + rar_delete_program(prog); + return NULL; + } + } + + return prog; +} + +static bool rar_execute_filter_prog(struct RARFilter *filter, RARVirtualMachine *vm) +{ + uint32_t newgloballength; + uint32_t globallength = filter->globaldatalen; + if (globallength > RARProgramSystemGlobalSize) + globallength = RARProgramSystemGlobalSize; + memcpy(&vm->memory[RARProgramSystemGlobalAddress], filter->globaldata, globallength); + if (filter->prog->staticdata) { + uint32_t staticlength = filter->prog->staticdatalen; + if (staticlength > RARProgramUserGlobalSize - globallength) + staticlength = RARProgramUserGlobalSize - globallength; + memcpy(&vm->memory[RARProgramUserGlobalAddress], filter->prog->staticdata, staticlength); + } + RARSetVirtualMachineRegisters(vm, filter->initialregisters); + + if (!RARExecuteProgram(vm, filter->prog->prog)) { + warn("Error while executing program in RAR VM"); + return false; + } + + newgloballength = RARVirtualMachineRead32(vm, RARProgramSystemGlobalAddress + 0x30); + if (newgloballength > RARProgramUserGlobalSize) + newgloballength = RARProgramUserGlobalSize; + if (newgloballength > 0) { + uint32_t newglobaldatalength = RARProgramSystemGlobalSize + newgloballength; + if (newglobaldatalength > filter->globaldatalen) { + uint8_t *newglobaldata = malloc(newglobaldatalength); + if (!newglobaldata) + return false; + free(filter->globaldata); + filter->globaldata = newglobaldata; + } + filter->globaldatalen = newglobaldatalength; + memcpy(filter->globaldata, &vm->memory[RARProgramSystemGlobalAddress], filter->globaldatalen); + } + else + filter->globaldatalen = 0; + + return true; +} + +static struct RARFilter *rar_create_filter(struct RARProgramCode *prog, const uint8_t *globaldata, uint32_t globaldatalen, uint32_t registers[8], size_t startpos, uint32_t length) +{ + struct RARFilter *filter; + + filter = calloc(1, sizeof(*filter)); + if (!filter) + return NULL; + filter->prog = prog; + filter->globaldatalen = globaldatalen > RARProgramSystemGlobalSize ? globaldatalen : RARProgramSystemGlobalSize; + filter->globaldata = calloc(1, filter->globaldatalen); + if (!filter->globaldata) + return NULL; + if (globaldata) + memcpy(filter->globaldata, globaldata, globaldatalen); + if (registers) + memcpy(filter->initialregisters, registers, sizeof(filter->initialregisters)); + filter->blockstartpos = startpos; + filter->blocklength = length; + + return filter; +} + +static void rar_delete_filter(struct RARFilter *filter) +{ + while (filter) { + struct RARFilter *next = filter->next; + free(filter->globaldata); + free(filter); + filter = next; + } +} + +static bool rar_execute_filter_delta(struct RARFilter *filter, RARVirtualMachine *vm) +{ + uint32_t length = filter->initialregisters[4]; + uint32_t numchannels = filter->initialregisters[0]; + uint8_t *src, *dst; + uint32_t i, idx; + + if (length > RARProgramWorkSize / 2) + return false; + + src = &vm->memory[0]; + dst = &vm->memory[length]; + for (i = 0; i < numchannels; i++) { + uint8_t lastbyte = 0; + for (idx = i; idx < length; idx += numchannels) + lastbyte = dst[idx] = lastbyte - *src++; + } + + filter->filteredblockaddress = length; + filter->filteredblocklength = length; + + return true; +} + +static bool rar_execute_filter_e8(struct RARFilter *filter, RARVirtualMachine *vm, size_t pos, bool e9also) +{ + uint32_t length = filter->initialregisters[4]; + uint32_t filesize = 0x1000000; + uint32_t i; + + if (length > RARProgramWorkSize || length < 4) + return false; + + for (i = 0; i <= length - 5; i++) { + if (vm->memory[i] == 0xE8 || (e9also && vm->memory[i] == 0xE9)) { + uint32_t currpos = (uint32_t)pos + i + 1; + int32_t address = (int32_t)RARVirtualMachineRead32(vm, i + 1); + if (address < 0 && currpos >= (uint32_t)-address) + RARVirtualMachineWrite32(vm, i + 1, address + filesize); + else if (address >= 0 && (uint32_t)address < filesize) + RARVirtualMachineWrite32(vm, i + 1, address - currpos); + i += 4; + } + } + + filter->filteredblockaddress = 0; + filter->filteredblocklength = length; + + return true; +} + +static bool rar_execute_filter_rgb(struct RARFilter *filter, RARVirtualMachine *vm) +{ + uint32_t stride = filter->initialregisters[0]; + uint32_t byteoffset = filter->initialregisters[1]; + uint32_t blocklength = filter->initialregisters[4]; + uint8_t *src, *dst; + uint32_t i, j; + + if (blocklength > RARProgramWorkSize / 2 || stride > blocklength) + return false; + + src = &vm->memory[0]; + dst = &vm->memory[blocklength]; + for (i = 0; i < 3; i++) { + uint8_t byte = 0; + uint8_t *prev = dst + i - stride; + for (j = i; j < blocklength; j += 3) { + if (prev >= dst) { + uint32_t delta1 = abs(prev[3] - prev[0]); + uint32_t delta2 = abs(byte - prev[0]); + uint32_t delta3 = abs(prev[3] - prev[0] + byte - prev[0]); + if (delta1 > delta2 || delta1 > delta3) + byte = delta2 <= delta3 ? prev[3] : prev[0]; + } + byte -= *src++; + dst[j] = byte; + prev += 3; + } + } + for (i = byteoffset; i < blocklength - 2; i += 3) { + dst[i] += dst[i + 1]; + dst[i + 2] += dst[i + 1]; + } + + filter->filteredblockaddress = blocklength; + filter->filteredblocklength = blocklength; + + return true; +} + +static bool rar_execute_filter_audio(struct RARFilter *filter, RARVirtualMachine *vm) +{ + uint32_t length = filter->initialregisters[4]; + uint32_t numchannels = filter->initialregisters[0]; + uint8_t *src, *dst; + uint32_t i, j; + + if (length > RARProgramWorkSize / 2) + return false; + + src = &vm->memory[0]; + dst = &vm->memory[length]; + for (i = 0; i < numchannels; i++) { + struct AudioState state; + memset(&state, 0, sizeof(state)); + for (j = i; j < length; j += numchannels) { + int8_t delta = (int8_t)*src++; + uint8_t predbyte, byte; + int prederror; + state.delta[2] = state.delta[1]; + state.delta[1] = state.lastdelta - state.delta[0]; + state.delta[0] = state.lastdelta; + predbyte = ((8 * state.lastbyte + state.weight[0] * state.delta[0] + state.weight[1] * state.delta[1] + state.weight[2] * state.delta[2]) >> 3) & 0xFF; + byte = (predbyte - delta) & 0xFF; + prederror = delta << 3; + state.error[0] += abs(prederror); + state.error[1] += abs(prederror - state.delta[0]); state.error[2] += abs(prederror + state.delta[0]); + state.error[3] += abs(prederror - state.delta[1]); state.error[4] += abs(prederror + state.delta[1]); + state.error[5] += abs(prederror - state.delta[2]); state.error[6] += abs(prederror + state.delta[2]); + state.lastdelta = (int8_t)(byte - state.lastbyte); + dst[j] = state.lastbyte = byte; + if (!(state.count++ & 0x1F)) { + uint8_t k, idx = 0; + for (k = 1; k < 7; k++) { + if (state.error[k] < state.error[idx]) + idx = k; + } + memset(state.error, 0, sizeof(state.error)); + switch (idx) { + case 1: if (state.weight[0] >= -16) state.weight[0]--; break; + case 2: if (state.weight[0] < 16) state.weight[0]++; break; + case 3: if (state.weight[1] >= -16) state.weight[1]--; break; + case 4: if (state.weight[1] < 16) state.weight[1]++; break; + case 5: if (state.weight[2] >= -16) state.weight[2]--; break; + case 6: if (state.weight[2] < 16) state.weight[2]++; break; + } + } + } + } + + filter->filteredblockaddress = length; + filter->filteredblocklength = length; + + return true; +} + +static bool rar_execute_filter(struct RARFilter *filter, RARVirtualMachine *vm, size_t pos) +{ + if (filter->prog->fingerprint == 0x1D0E06077D) + return rar_execute_filter_delta(filter, vm); + if (filter->prog->fingerprint == 0x35AD576887) + return rar_execute_filter_e8(filter, vm, pos, false); + if (filter->prog->fingerprint == 0x393CD7E57E) + return rar_execute_filter_e8(filter, vm, pos, true); + if (filter->prog->fingerprint == 0x951C2C5DC8) + return rar_execute_filter_rgb(filter, vm); + if (filter->prog->fingerprint == 0xD8BC85E701) + return rar_execute_filter_audio(filter, vm); + log("Unknown parsing filter 0x%x%08x", (uint32_t)(filter->prog->fingerprint >> 32), (uint32_t)filter->prog->fingerprint); + + /* XADRAR30Filter.m @executeOnVirtualMachine claims that this is required */ + if (filter->prog->globalbackuplen > RARProgramSystemGlobalSize) { + uint8_t *newglobaldata = malloc(filter->prog->globalbackuplen); + if (newglobaldata) { + free(filter->globaldata); + filter->globaldata = newglobaldata; + filter->globaldatalen = filter->prog->globalbackuplen; + memcpy(filter->globaldata, filter->prog->globalbackup, filter->prog->globalbackuplen); + } + } + + filter->initialregisters[6] = (uint32_t)pos; + bw_write32le(&filter->globaldata[0x24], (uint32_t)pos); + bw_write32le(&filter->globaldata[0x28], (uint32_t)((uint64_t)pos >> 32)); + + if (!rar_execute_filter_prog(filter, vm)) + return false; + + filter->filteredblockaddress = RARVirtualMachineRead32(vm, RARProgramSystemGlobalAddress + 0x20) & RARProgramMemoryMask; + filter->filteredblocklength = RARVirtualMachineRead32(vm, RARProgramSystemGlobalAddress + 0x1C) & RARProgramMemoryMask; + if (filter->filteredblockaddress + filter->filteredblocklength >= RARProgramMemorySize) { + filter->filteredblockaddress = filter->filteredblocklength = 0; + return false; + } + + if (filter->globaldatalen > RARProgramSystemGlobalSize) { + uint8_t *newglobalbackup = malloc(filter->globaldatalen); + if (newglobalbackup) { + free(filter->prog->globalbackup); + filter->prog->globalbackup = newglobalbackup; + filter->prog->globalbackuplen = filter->globaldatalen; + memcpy(filter->prog->globalbackup, filter->globaldata, filter->globaldatalen); + } + } + else + filter->prog->globalbackuplen = 0; + + return true; +} + +bool rar_parse_filter(ar_archive_rar *rar, const uint8_t *bytes, uint16_t length, uint8_t flags) +{ + struct ar_archive_rar_uncomp_v3 *uncomp = &rar->uncomp.state.v3; + struct ar_archive_rar_filters *filters = &uncomp->filters; + + struct MemBitReader br = { 0 }; + struct RARProgramCode *prog; + struct RARFilter *filter, **nextfilter; + + uint32_t numprogs, num, blocklength, globaldatalen; + uint8_t *globaldata; + size_t blockstartpos; + uint32_t registers[8] = { 0 }; + uint32_t i; + + br.bytes = bytes; + br.length = length; + + numprogs = 0; + for (prog = filters->progs; prog; prog = prog->next) + numprogs++; + + if ((flags & 0x80)) { + num = br_next_rarvm_number(&br); + if (num == 0) { + rar_delete_filter(filters->stack); + filters->stack = NULL; + rar_delete_program(filters->progs); + filters->progs = NULL; + } + else + num--; + if (num > numprogs) { + warn("Invalid program number"); + return false; + } + filters->lastfilternum = num; + } + else + num = filters->lastfilternum; + + prog = filters->progs; + for (i = 0; i < num; i++) + prog = prog->next; + if (prog) + prog->usagecount++; + + blockstartpos = br_next_rarvm_number(&br) + (size_t)lzss_position(&rar->uncomp.lzss); + if ((flags & 0x40)) + blockstartpos += 258; + if ((flags & 0x20)) + blocklength = br_next_rarvm_number(&br); + else + blocklength = prog ? prog->oldfilterlength : 0; + + registers[3] = RARProgramSystemGlobalAddress; + registers[4] = blocklength; + registers[5] = prog ? prog->usagecount : 0; + registers[7] = RARProgramMemorySize; + + if ((flags & 0x10)) { + uint8_t mask = (uint8_t)br_bits(&br, 7); + for (i = 0; i < 7; i++) { + if ((mask & (1 << i))) + registers[i] = br_next_rarvm_number(&br); + } + } + + if (!prog) { + uint32_t len = br_next_rarvm_number(&br); + uint8_t *bytecode; + struct RARProgramCode **next; + + if (len == 0 || len > 0x10000) { + warn("Invalid RARVM bytecode length"); + return false; + } + bytecode = malloc(len); + if (!bytecode) + return false; + for (i = 0; i < len; i++) + bytecode[i] = (uint8_t)br_bits(&br, 8); + prog = rar_compile_program(bytecode, len); + if (!prog) { + free(bytecode); + return false; + } + free(bytecode); + next = &filters->progs; + while (*next) + next = &(*next)->next; + *next = prog; + } + prog->oldfilterlength = blocklength; + + globaldata = NULL; + globaldatalen = 0; + if ((flags & 0x08)) { + globaldatalen = br_next_rarvm_number(&br); + if (globaldatalen > RARProgramUserGlobalSize) { + warn("Invalid RARVM data length"); + return false; + } + globaldata = malloc(globaldatalen + RARProgramSystemGlobalSize); + if (!globaldata) + return false; + for (i = 0; i < globaldatalen; i++) + globaldata[i + RARProgramSystemGlobalSize] = (uint8_t)br_bits(&br, 8); + } + + if (br.at_eof) { + free(globaldata); + return false; + } + + filter = rar_create_filter(prog, globaldata, globaldatalen, registers, blockstartpos, blocklength); + free(globaldata); + if (!filter) + return false; + + for (i = 0; i < 7; i++) + bw_write32le(&filter->globaldata[i * 4], registers[i]); + bw_write32le(&filter->globaldata[0x1C], blocklength); + bw_write32le(&filter->globaldata[0x20], 0); + bw_write32le(&filter->globaldata[0x2C], prog->usagecount); + + nextfilter = &filters->stack; + while (*nextfilter) + nextfilter = &(*nextfilter)->next; + *nextfilter = filter; + + if (!filters->stack->next) + filters->filterstart = blockstartpos; + + return true; +} + +bool rar_run_filters(ar_archive_rar *rar) +{ + struct ar_archive_rar_filters *filters = &rar->uncomp.state.v3.filters; + struct RARFilter *filter = filters->stack; + size_t start = filters->filterstart; + size_t end = start + filter->blocklength; + uint32_t lastfilteraddress; + uint32_t lastfilterlength; + + filters->filterstart = SIZE_MAX; + end = (size_t)rar_expand(rar, end); + if (end != start + filter->blocklength) { + warn("Failed to expand the expected amout of bytes"); + return false; + } + + if (!filters->vm) { + filters->vm = calloc(1, sizeof(*filters->vm)); + if (!filters->vm) + return false; + } + + lzss_copy_bytes_from_window(&rar->uncomp.lzss, filters->vm->memory, start, filter->blocklength); + if (!rar_execute_filter(filter, filters->vm, rar->progress.bytes_done)) { + warn("Failed to execute parsing filter"); + return false; + } + + lastfilteraddress = filter->filteredblockaddress; + lastfilterlength = filter->filteredblocklength; + filters->stack = filter->next; + filter->next = NULL; + rar_delete_filter(filter); + + while ((filter = filters->stack) != NULL && filter->blockstartpos == filters->filterstart && filter->blocklength == lastfilterlength) { + memmove(&filters->vm->memory[0], &filters->vm->memory[lastfilteraddress], lastfilterlength); + if (!rar_execute_filter(filter, filters->vm, rar->progress.bytes_done)) { + warn("Failed to execute parsing filter"); + return false; + } + + lastfilteraddress = filter->filteredblockaddress; + lastfilterlength = filter->filteredblocklength; + filters->stack = filter->next; + filter->next = NULL; + rar_delete_filter(filter); + } + + if (filters->stack) { + if (filters->stack->blockstartpos < end) { + warn("Bad filter order"); + return false; + } + filters->filterstart = filters->stack->blockstartpos; + } + + filters->lastend = end; + filters->bytes = &filters->vm->memory[lastfilteraddress]; + filters->bytes_ready = lastfilterlength; + + return true; +} + +void rar_clear_filters(struct ar_archive_rar_filters *filters) +{ + rar_delete_filter(filters->stack); + rar_delete_program(filters->progs); + free(filters->vm); +} diff --git a/cut-n-paste/unarr/rar/huffman-rar.c b/cut-n-paste/unarr/rar/huffman-rar.c new file mode 100644 index 00000000..c77eed93 --- /dev/null +++ b/cut-n-paste/unarr/rar/huffman-rar.c @@ -0,0 +1,142 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADPrefixCode.m */ + +#include "rar.h" + +bool rar_new_node(struct huffman_code *code) +{ + if (!code->tree) { + code->minlength = INT_MAX; + code->maxlength = INT_MIN; + } + if (code->numentries + 1 >= code->capacity) { + /* in my small file sample, 1024 is the value needed most often */ + int new_capacity = code->capacity ? code->capacity * 2 : 1024; + void *new_tree = calloc(new_capacity, sizeof(*code->tree)); + if (!new_tree) { + warn("OOM during decompression"); + return false; + } + memcpy(new_tree, code->tree, code->capacity * sizeof(*code->tree)); + free(code->tree); + code->tree = new_tree; + code->capacity = new_capacity; + } + code->tree[code->numentries].branches[0] = -1; + code->tree[code->numentries].branches[1] = -2; + code->numentries++; + return true; +} + +bool rar_add_value(struct huffman_code *code, int value, int codebits, int length) +{ + int lastnode, bitpos, bit; + + free(code->table); + code->table = NULL; + + if (length > code->maxlength) + code->maxlength = length; + if (length < code->minlength) + code->minlength = length; + + lastnode = 0; + for (bitpos = length - 1; bitpos >= 0; bitpos--) { + bit = (codebits >> bitpos) & 1; + if (rar_is_leaf_node(code, lastnode)) { + warn("Invalid data in bitstream"); /* prefix found */ + return false; + } + if (code->tree[lastnode].branches[bit] < 0) { + if (!rar_new_node(code)) + return false; + code->tree[lastnode].branches[bit] = code->numentries - 1; + } + lastnode = code->tree[lastnode].branches[bit]; + } + + if (code->tree[lastnode].branches[0] != -1 || code->tree[lastnode].branches[1] != -2) { + warn("Invalid data in bitstream"); /* prefix found */ + return false; + } + code->tree[lastnode].branches[0] = code->tree[lastnode].branches[1] = value; + return true; +} + +bool rar_create_code(struct huffman_code *code, uint8_t *lengths, int numsymbols) +{ + int symbolsleft = numsymbols; + int codebits = 0; + int i, j; + + if (!rar_new_node(code)) + return false; + + for (i = 1; i <= 0x0F; i++) { + for (j = 0; j < numsymbols; j++) { + if (lengths[j] != i) + continue; + if (!rar_add_value(code, j, codebits, i)) + return false; + if (--symbolsleft <= 0) + return true; + codebits++; + } + codebits <<= 1; + } + return true; +} + +static bool rar_make_table_rec(struct huffman_code *code, int node, int offset, int depth, int maxdepth) +{ + int currtablesize = 1 << (maxdepth - depth); + + if (node < 0 || code->numentries <= node) { + warn("Invalid data in bitstream"); /* invalid location to Huffman tree specified */ + return false; + } + + if (rar_is_leaf_node(code, node)) { + int i; + for (i = 0; i < currtablesize; i++) { + code->table[offset + i].length = depth; + code->table[offset + i].value = code->tree[node].branches[0]; + } + } + else if (depth == maxdepth) { + code->table[offset].length = maxdepth + 1; + code->table[offset].value = node; + } + else { + if (!rar_make_table_rec(code, code->tree[node].branches[0], offset, depth + 1, maxdepth)) + return false; + if (!rar_make_table_rec(code, code->tree[node].branches[1], offset + currtablesize / 2, depth + 1, maxdepth)) + return false; + } + return true; +} + +bool rar_make_table(struct huffman_code *code) +{ + if (code->minlength <= code->maxlength && code->maxlength <= 10) + code->tablesize = code->maxlength; + else + code->tablesize = 10; + + code->table = calloc(1ULL << code->tablesize, sizeof(*code->table)); + if (!code->table) { + warn("OOM during decompression"); + return false; + } + + return rar_make_table_rec(code, 0, 0, 0, code->tablesize); +} + +void rar_free_code(struct huffman_code *code) +{ + free(code->tree); + free(code->table); + memset(code, 0, sizeof(*code)); +} diff --git a/cut-n-paste/unarr/rar/lzss.h b/cut-n-paste/unarr/rar/lzss.h new file mode 100644 index 00000000..580fe4c5 --- /dev/null +++ b/cut-n-paste/unarr/rar/lzss.h @@ -0,0 +1,88 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/LZSS.h */ + +#ifndef rar_lzss_h +#define rar_lzss_h + +#include +#include +#include +#include + +#if defined(_MSC_VER) && !defined(inline) +#define inline __inline +#endif + +typedef struct { + uint8_t *window; + int mask; + int64_t position; +} LZSS; + +static inline int64_t lzss_position(LZSS *self) { return self->position; } + +static inline int lzss_mask(LZSS *self) { return self->mask; } + +static inline int lzss_size(LZSS *self) { return self->mask + 1; } + +static inline uint8_t *lzss_window_pointer(LZSS *self) { return self->window; } + +static inline int lzss_offset_for_position(LZSS *self, int64_t pos) { return (int)(pos & self->mask); } + +static inline uint8_t *lzss_window_pointer_for_position(LZSS *self, int64_t pos) { return &self->window[lzss_offset_for_position(self, pos)]; } + +static inline int lzss_current_window_offset(LZSS *self) { return lzss_offset_for_position(self, self->position); } + +static inline uint8_t *lzss_current_window_pointer(LZSS *self) { return lzss_window_pointer_for_position(self, self->position); } + +static inline int64_t lzss_next_window_edge_after_position(LZSS *self, int64_t pos) { return (pos + lzss_size(self)) & ~(int64_t)lzss_mask(self); } + +static inline int64_t lzss_next_window_edge(LZSS *self) { return lzss_next_window_edge_after_position(self, self->position); } + +static inline uint8_t lzss_get_byte_from_window(LZSS *self, int64_t pos) { return *lzss_window_pointer_for_position(self, pos); } + +static inline void lzss_emit_literal(LZSS *self, uint8_t literal) { + /* self->window[(self->position & self->mask)] = literal; */ + *lzss_current_window_pointer(self) = literal; + self->position++; +} + +static inline void lzss_emit_match(LZSS *self, int offset, int length) { + int windowoffs = lzss_current_window_offset(self); + int i; + for (i = 0; i < length; i++) { + self->window[(windowoffs + i) & lzss_mask(self)] = self->window[(windowoffs + i - offset) & lzss_mask(self)]; + } + self->position += length; +} + +static inline void lzss_copy_bytes_from_window(LZSS *self, uint8_t *buffer, int64_t startpos, int length) { + int windowoffs = lzss_offset_for_position(self, startpos); + int firstpart = lzss_size(self) - windowoffs; + if (length <= firstpart) { + /* Request fits inside window */ + memcpy(buffer, &self->window[windowoffs], length); + } + else { + /* Request wraps around window */ + memcpy(buffer, &self->window[windowoffs], firstpart); + memcpy(buffer + firstpart, &self->window[0], length - firstpart); + } +} + +static inline bool lzss_initialize(LZSS *self, int windowsize) { + self->window = malloc(windowsize); + if (!self->window) + return false; + + self->mask = windowsize - 1; /* Assume windows are power-of-two sized! */ + memset(self->window, 0, lzss_size(self)); + self->position = 0; + return true; +} + +static inline void lzss_cleanup(LZSS *self) { free(self->window); } + +#endif diff --git a/cut-n-paste/unarr/rar/parse-rar.c b/cut-n-paste/unarr/rar/parse-rar.c new file mode 100644 index 00000000..f41534c6 --- /dev/null +++ b/cut-n-paste/unarr/rar/parse-rar.c @@ -0,0 +1,236 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRARParser.m */ + +#include "rar.h" + +static inline uint8_t uint8le(unsigned char *data) { return data[0]; } +static inline uint16_t uint16le(unsigned char *data) { return data[0] | data[1] << 8; } +static inline uint32_t uint32le(unsigned char *data) { return data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; } + +bool rar_parse_header(ar_archive *ar, struct rar_header *header) +{ + unsigned char header_data[7]; + size_t read = ar_read(ar->stream, header_data, sizeof(header_data)); + if (read == 0) { + ar->at_eof = true; + return false; + } + if (read < sizeof(header_data)) + return false; + + header->crc = uint16le(header_data + 0); + header->type = uint8le(header_data + 2); + header->flags = uint16le(header_data + 3); + header->size = uint16le(header_data + 5); + + header->datasize = 0; + if ((header->flags & LHD_LONG_BLOCK) || header->type == 0x74) { + unsigned char size_data[4]; + if (!(header->flags & LHD_LONG_BLOCK)) + log("File header without LHD_LONG_BLOCK set"); + read += ar_read(ar->stream, size_data, sizeof(size_data)); + if (read < sizeof(header_data) + sizeof(size_data)) + return false; + header->datasize = uint32le(size_data); + } + + if (header->size < read) { + warn("Invalid header size %d", header->size); + return false; + } + + return true; +} + +bool rar_check_header_crc(ar_archive *ar) +{ + unsigned char buffer[256]; + uint16_t crc16, size; + uint32_t crc32; + + if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET)) + return false; + if (ar_read(ar->stream, buffer, 7) != 7) + return false; + + crc16 = uint16le(buffer + 0); + size = uint16le(buffer + 5); + if (size < 7) + return false; + size -= 7; + + crc32 = ar_crc32(0, buffer + 2, 5); + while (size > 0) { + if (ar_read(ar->stream, buffer, smin(size, sizeof(buffer))) != smin(size, sizeof(buffer))) + return false; + crc32 = ar_crc32(crc32, buffer, smin(size, sizeof(buffer))); + size -= (uint16_t)smin(size, sizeof(buffer)); + } + return (crc32 & 0xFFFF) == crc16; +} + +bool rar_parse_header_entry(ar_archive_rar *rar, struct rar_header *header, struct rar_entry *entry) +{ + unsigned char data[21]; + if (ar_read(rar->super.stream, data, sizeof(data)) != sizeof(data)) + return false; + + entry->size = uint32le(data + 0); + entry->os = uint8le(data + 4); + entry->crc = uint32le(data + 5); + entry->dosdate = uint32le(data + 9); + entry->version = uint8le(data + 13); + entry->method = uint8le(data + 14); + entry->namelen = uint16le(data + 15); + entry->attrs = uint32le(data + 17); + if ((header->flags & LHD_LARGE)) { + unsigned char more_data[8]; + if (ar_read(rar->super.stream, more_data, sizeof(more_data)) != sizeof(more_data)) + return false; + header->datasize += (uint64_t)uint32le(more_data + 0); + entry->size += (uint64_t)uint32le(more_data + 4); + } + if (!ar_skip(rar->super.stream, entry->namelen)) + return false; + if ((header->flags & LHD_SALT)) { + log("Skipping LHD_SALT"); + ar_skip(rar->super.stream, 8); + } + + rar->entry.version = entry->version; + rar->entry.method = entry->method; + rar->entry.crc = entry->crc; + rar->entry.header_size = header->size; + rar->entry.solid = entry->version < 20 ? (rar->archive_flags & MHD_SOLID) : (header->flags & LHD_SOLID); + free(rar->entry.name); + rar->entry.name = NULL; + + return true; +} + +/* this seems to be what RAR considers "Unicode" */ +static char *rar_conv_unicode_to_utf8(const char *data, uint16_t len) +{ +#define Check(cond) if (!(cond)) { free(str); return NULL; } else ((void)0) + + uint8_t highbyte, flagbyte, flagbits, size, length, i; + const uint8_t *in = (uint8_t *)data + strlen(data) + 1; + const uint8_t *end_in = (uint8_t *)data + len; + char *str = calloc(len + 1, 3); + char *out = str; + char *end_out = str + len * 3; + + if (!str) + return NULL; + if (end_in - in <= 1) { + memcpy(str, data, len); + return str; + } + + highbyte = *in++; + flagbyte = 0; + flagbits = 0; + size = 0; + + while (in < end_in && out < end_out) { + if (flagbits == 0) { + flagbyte = *in++; + flagbits = 8; + } + flagbits -= 2; + switch ((flagbyte >> flagbits) & 3) { + case 0: + Check(in + 1 <= end_in); + out += ar_conv_rune_to_utf8(*in++, out, end_out - out); + size++; + break; + case 1: + Check(in + 1 <= end_in); + out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | *in++, out, end_out - out); + size++; + break; + case 2: + Check(in + 2 <= end_in); + out += ar_conv_rune_to_utf8(((uint16_t)*(in + 1) << 8) | *in, out, end_out - out); + in += 2; + size++; + break; + case 3: + Check(in + 1 <= end_in); + length = *in++; + if ((length & 0x80)) { + uint8_t correction = *in++; + for (i = 0; i < (length & 0x7F) + 2; i++) { + Check(size < len); + out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | (data[size] + (correction & 0xFF)), out, end_out - out); + size++; + } + } + else { + for (i = 0; i < (length & 0x7F) + 2; i++) { + Check(size < len); + out += ar_conv_rune_to_utf8(data[size], out, end_out - out); + size++; + } + } + break; + } + } + + return str; + +#undef Check +} + +const char *rar_get_name(ar_archive *ar) +{ + ar_archive_rar *rar = (ar_archive_rar *)ar; + if (!rar->entry.name) { + unsigned char data[21]; + uint16_t namelen; + char *name; + + struct rar_header header; + if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET)) + return NULL; + if (!rar_parse_header(ar, &header)) + return NULL; + if (ar_read(ar->stream, data, sizeof(data)) != sizeof(data)) + return NULL; + if ((header.flags & LHD_LARGE) && !ar_skip(ar->stream, 8)) + return NULL; + + namelen = uint16le(data + 15); + name = malloc(namelen + 1); + if (!name || ar_read(ar->stream, name, namelen) != namelen) { + free(name); + return NULL; + } + name[namelen] = '\0'; + + if (!(header.flags & LHD_UNICODE)) { + rar->entry.name = ar_conv_dos_to_utf8(name); + free(name); + } + else if (namelen == strlen(name)) { + rar->entry.name = name; + } + else { + rar->entry.name = rar_conv_unicode_to_utf8(name, namelen); + free(name); + } + /* normalize path separators */ + if (rar->entry.name) { + char *p = rar->entry.name; + while ((p = strchr(p, '\\')) != NULL) { + *p = '/'; + } + } + + if (!ar_seek(ar->stream, ar->entry_offset + rar->entry.header_size, SEEK_SET)) + warn("Couldn't seek back to the end of the entry header"); + } + return rar->entry.name; +} diff --git a/cut-n-paste/unarr/rar/rar.c b/cut-n-paste/unarr/rar/rar.c new file mode 100644 index 00000000..d03b2da4 --- /dev/null +++ b/cut-n-paste/unarr/rar/rar.c @@ -0,0 +1,254 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#include "rar.h" + +static void rar_close(ar_archive *ar) +{ + ar_archive_rar *rar = (ar_archive_rar *)ar; + free(rar->entry.name); + rar_clear_uncompress(&rar->uncomp); +} + +static bool rar_parse_entry(ar_archive *ar, off64_t offset) +{ + ar_archive_rar *rar = (ar_archive_rar *)ar; + struct rar_header header; + struct rar_entry entry; + bool out_of_order = offset != ar->entry_offset_next; + + if (!ar_seek(ar->stream, offset, SEEK_SET)) { + warn("Couldn't seek to offset %" PRIi64, offset); + return false; + } + + for (;;) { + ar->entry_offset = ar_tell(ar->stream); + ar->entry_size_uncompressed = 0; + + if (!rar_parse_header(ar, &header)) + return false; + + ar->entry_offset_next = ar->entry_offset + header.size + header.datasize; + if (ar->entry_offset_next < ar->entry_offset + header.size) { + warn("Integer overflow due to overly large data size"); + return false; + } + + switch (header.type) { + case TYPE_MAIN_HEADER: + if ((header.flags & MHD_PASSWORD)) { + warn("Encrypted archives aren't supported"); + return false; + } + ar_skip(ar->stream, 6 /* reserved data */); + if ((header.flags & MHD_ENCRYPTVER)) { + log("MHD_ENCRYPTVER is set"); + ar_skip(ar->stream, 1); + } + if ((header.flags & MHD_COMMENT)) + log("MHD_COMMENT is set"); + if (ar_tell(ar->stream) - ar->entry_offset > header.size) { + warn("Invalid RAR header size: %d", header.size); + return false; + } + rar->archive_flags = header.flags; + break; + + case TYPE_FILE_ENTRY: + if (!rar_parse_header_entry(rar, &header, &entry)) + return false; + if ((header.flags & LHD_PASSWORD)) + warn("Encrypted entries will fail to uncompress"); + if ((header.flags & LHD_DIRECTORY) == LHD_DIRECTORY) { + if (header.datasize == 0) { + log("Skipping directory entry \"%s\"", rar_get_name(ar)); + break; + } + warn("Can't skip directory entries containing data"); + } + if ((header.flags & (LHD_SPLIT_BEFORE | LHD_SPLIT_AFTER))) + warn("Splitting files isn't really supported"); + ar->entry_size_uncompressed = (size_t)entry.size; + ar->entry_filetime = ar_conv_dosdate_to_filetime(entry.dosdate); + if (!rar->entry.solid || rar->entry.method == METHOD_STORE || out_of_order) { + rar_clear_uncompress(&rar->uncomp); + memset(&rar->solid, 0, sizeof(rar->solid)); + } + else { + br_clear_leftover_bits(&rar->uncomp); + } + + rar->solid.restart = rar->entry.solid && (out_of_order || !rar->solid.part_done); + rar->solid.part_done = !ar->entry_size_uncompressed; + rar->progress.data_left = (size_t)header.datasize; + rar->progress.bytes_done = 0; + rar->progress.crc = 0; + + /* TODO: CRC checks don't always hold (claim in XADRARParser.m @readBlockHeader) */ + if (!rar_check_header_crc(ar)) + warn("Invalid header checksum @%" PRIi64, ar->entry_offset); + if (ar_tell(ar->stream) != ar->entry_offset + rar->entry.header_size) { + warn("Couldn't seek to offset %" PRIi64, ar->entry_offset + rar->entry.header_size); + return false; + } + return true; + + case TYPE_NEWSUB: + log("Skipping newsub header @%" PRIi64, ar->entry_offset); + break; + + case TYPE_END_OF_ARCHIVE: + ar->at_eof = true; + return false; + + default: + log("Unknown RAR header type %02x", header.type); + break; + } + + /* TODO: CRC checks don't always hold (claim in XADRARParser.m @readBlockHeader) */ + if (!rar_check_header_crc(ar)) + warn("Invalid header checksum @%" PRIi64, ar->entry_offset); + if (!ar_seek(ar->stream, ar->entry_offset_next, SEEK_SET)) { + warn("Couldn't seek to offset %" PRIi64, ar->entry_offset_next); + return false; + } + } +} + +static bool rar_copy_stored(ar_archive_rar *rar, void *buffer, size_t count) +{ + if (count > rar->progress.data_left) { + warn("Unexpected EOS in stored data"); + return false; + } + if (ar_read(rar->super.stream, buffer, count) != count) { + warn("Unexpected EOF in stored data"); + return false; + } + rar->progress.data_left -= count; + rar->progress.bytes_done += count; + return true; +} + +static bool rar_restart_solid(ar_archive *ar) +{ + ar_archive_rar *rar = (ar_archive_rar *)ar; + off64_t current_offset = ar->entry_offset; + log("Restarting decompression for solid entry"); + if (!ar_parse_entry_at(ar, ar->entry_offset_first)) { + ar_parse_entry_at(ar, current_offset); + return false; + } + while (ar->entry_offset < current_offset) { + size_t size = ar->entry_size_uncompressed; + rar->solid.restart = false; + while (size > 0) { + unsigned char buffer[1024]; + size_t count = smin(size, sizeof(buffer)); + if (!ar_entry_uncompress(ar, buffer, count)) { + ar_parse_entry_at(ar, current_offset); + return false; + } + size -= count; + } + if (!ar_parse_entry(ar)) { + ar_parse_entry_at(ar, current_offset); + return false; + } + } + rar->solid.restart = false; + return true; +} + +static bool rar_uncompress(ar_archive *ar, void *buffer, size_t count) +{ + ar_archive_rar *rar = (ar_archive_rar *)ar; + if (count > ar->entry_size_uncompressed - rar->progress.bytes_done) { + warn("Requesting too much data (%" PRIuPTR " < %" PRIuPTR ")", ar->entry_size_uncompressed - rar->progress.bytes_done, count); + return false; + } + if (rar->entry.method == METHOD_STORE) { + if (!rar_copy_stored(rar, buffer, count)) + return false; + } + else if (rar->entry.method == METHOD_FASTEST || rar->entry.method == METHOD_FAST || + rar->entry.method == METHOD_NORMAL || rar->entry.method == METHOD_GOOD || + rar->entry.method == METHOD_BEST) { + if (rar->solid.restart && !rar_restart_solid(ar)) { + warn("Failed to produce the required solid decompression state"); + return false; + } + if (!rar_uncompress_part(rar, buffer, count)) + return false; + } + else { + warn("Unknown compression method %#02x", rar->entry.method); + return false; + } + + rar->progress.crc = ar_crc32(rar->progress.crc, buffer, count); + if (rar->progress.bytes_done < ar->entry_size_uncompressed) + return true; + if (rar->progress.data_left) + log("Compressed block has more data than required"); + rar->solid.part_done = true; + rar->solid.size_total += rar->progress.bytes_done; + if (rar->progress.crc != rar->entry.crc) { + warn("Checksum of extracted data doesn't match"); + return false; + } + return true; +} + +ar_archive *ar_open_rar_archive(ar_stream *stream) +{ + char signature[FILE_SIGNATURE_SIZE]; + if (!ar_seek(stream, 0, SEEK_SET)) + return NULL; + if (ar_read(stream, signature, sizeof(signature)) != sizeof(signature)) + return NULL; + if (memcmp(signature, "Rar!\x1A\x07\x00", sizeof(signature)) != 0) { + if (memcmp(signature, "Rar!\x1A\x07\x01", sizeof(signature)) == 0) + warn("RAR 5 format isn't supported"); + else if (memcmp(signature, "RE~^", 4) == 0) + warn("Ancient RAR format isn't supported"); + else if (memcmp(signature, "MZ", 2) == 0 || memcmp(signature, "\x7F\x45LF", 4) == 0) + warn("SFX archives aren't supported"); + return NULL; + } + + return ar_open_archive(stream, sizeof(ar_archive_rar), rar_close, rar_parse_entry, rar_get_name, rar_uncompress, NULL, FILE_SIGNATURE_SIZE); +} + +ar_archive *ar_open_rar_archive_with_error(ar_stream *stream, + ArArchiveError *error_code) +{ + char signature[FILE_SIGNATURE_SIZE]; + ar_archive *ret; + + if (!ar_seek(stream, 0, SEEK_SET)) { + *error_code = AR_ARCHIVE_ERROR_UNKNOWN; + return NULL; + } + if (ar_read(stream, signature, sizeof(signature)) != sizeof(signature)) { + *error_code = AR_ARCHIVE_ERROR_UNKNOWN; + return NULL; + } + if (memcmp(signature, "Rar!\x1A\x07\x00", sizeof(signature)) != 0) { + if (memcmp(signature, "Rar!\x1A\x07\x01", sizeof(signature)) == 0) + *error_code = AR_ARCHIVE_ERROR_RAR5; + else if (memcmp(signature, "RE~^", 4) == 0) + *error_code = AR_ARCHIVE_ERROR_OLDRAR; + else if (memcmp(signature, "MZ", 2) == 0 || memcmp(signature, "\x7F\x45LF", 4) == 0) + *error_code = AR_ARCHIVE_ERROR_SFX; + return NULL; + } + + + ret = ar_open_archive(stream, sizeof(ar_archive_rar), rar_close, rar_parse_entry, rar_get_name, rar_uncompress, NULL, FILE_SIGNATURE_SIZE); + if (!ret) + *error_code = AR_ARCHIVE_ERROR_UNKNOWN; + return ret; +} diff --git a/cut-n-paste/unarr/rar/rar.h b/cut-n-paste/unarr/rar/rar.h new file mode 100644 index 00000000..783f9f75 --- /dev/null +++ b/cut-n-paste/unarr/rar/rar.h @@ -0,0 +1,252 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef rar_rar_h +#define rar_rar_h + +#include "../common/unarr-imp.h" + +#include "lzss.h" +#include "../lzmasdk/Ppmd7.h" +#include + +static inline size_t smin(size_t a, size_t b) { return a < b ? a : b; } + +typedef struct ar_archive_rar_s ar_archive_rar; + +/***** parse-rar *****/ + +#define FILE_SIGNATURE_SIZE 7 + +enum block_types { + TYPE_FILE_SIGNATURE = 0x72, TYPE_MAIN_HEADER = 0x73, TYPE_FILE_ENTRY = 0x74, + TYPE_NEWSUB = 0x7A, TYPE_END_OF_ARCHIVE = 0x7B, +}; + +enum archive_flags { + MHD_VOLUME = 1 << 0, MHD_COMMENT = 1 << 1, MHD_LOCK = 1 << 2, + MHD_SOLID = 1 << 3, MHD_PACK_COMMENT = 1 << 4, MHD_AV = 1 << 5, + MHD_PROTECT = 1 << 6, MHD_PASSWORD = 1 << 7, MHD_FIRSTVOLUME = 1 << 8, + MHD_ENCRYPTVER = 1 << 9, + MHD_LONG_BLOCK = 1 << 15, +}; + +enum entry_flags { + LHD_SPLIT_BEFORE = 1 << 0, LHD_SPLIT_AFTER = 1 << 1, LHD_PASSWORD = 1 << 2, + LHD_COMMENT = 1 << 3, LHD_SOLID = 1 << 4, + LHD_DIRECTORY = (1 << 5) | (1 << 6) | (1 << 7), + LHD_LARGE = 1 << 8, LHD_UNICODE = 1 << 9, LHD_SALT = 1 << 10, + LHD_VERSION = 1 << 11, LHD_EXTTIME = 1 << 12, LHD_EXTFLAGS = 1 << 13, + LHD_LONG_BLOCK = 1 << 15, +}; + +enum compression_method { + METHOD_STORE = 0x30, + METHOD_FASTEST = 0x31, METHOD_FAST = 0x32, METHOD_NORMAL = 0x33, + METHOD_GOOD = 0x34, METHOD_BEST = 0x35, +}; + +struct rar_header { + uint16_t crc; + uint8_t type; + uint16_t flags; + uint16_t size; + uint64_t datasize; +}; + +struct rar_entry { + uint64_t size; + uint8_t os; + uint32_t crc; + uint32_t dosdate; + uint8_t version; + uint8_t method; + uint16_t namelen; + uint32_t attrs; +}; + +struct ar_archive_rar_entry { + uint8_t version; + uint8_t method; + uint32_t crc; + uint16_t header_size; + bool solid; + char *name; +}; + +bool rar_parse_header(ar_archive *ar, struct rar_header *header); +bool rar_check_header_crc(ar_archive *ar); +bool rar_parse_header_entry(ar_archive_rar *rar, struct rar_header *header, struct rar_entry *entry); +const char *rar_get_name(ar_archive *ar); + +/***** filter-rar *****/ + +struct RARVirtualMachine; +struct RARProgramCode; +struct RARFilter; + +struct ar_archive_rar_filters { + struct RARVirtualMachine *vm; + struct RARProgramCode *progs; + struct RARFilter *stack; + size_t filterstart; + uint32_t lastfilternum; + size_t lastend; + uint8_t *bytes; + size_t bytes_ready; +}; + +bool rar_parse_filter(ar_archive_rar *rar, const uint8_t *bytes, uint16_t length, uint8_t flags); +bool rar_run_filters(ar_archive_rar *rar); +void rar_clear_filters(struct ar_archive_rar_filters *filters); + +/***** huffman-rar *****/ + +struct huffman_code { + struct { + int branches[2]; + } *tree; + int numentries; + int capacity; + int minlength; + int maxlength; + struct { + int length; + int value; + } *table; + int tablesize; +}; + +bool rar_new_node(struct huffman_code *code); +bool rar_add_value(struct huffman_code *code, int value, int codebits, int length); +bool rar_create_code(struct huffman_code *code, uint8_t *lengths, int numsymbols); +bool rar_make_table(struct huffman_code *code); +void rar_free_code(struct huffman_code *code); + +static inline bool rar_is_leaf_node(struct huffman_code *code, int node) { return code->tree[node].branches[0] == code->tree[node].branches[1]; } + +/***** uncompress-rar *****/ + +#define LZSS_WINDOW_SIZE 0x400000 +#define LZSS_OVERFLOW_SIZE 288 + +#define MAINCODE_SIZE 299 +#define OFFSETCODE_SIZE 60 +#define LOWOFFSETCODE_SIZE 17 +#define LENGTHCODE_SIZE 28 +#define HUFFMAN_TABLE_SIZE MAINCODE_SIZE + OFFSETCODE_SIZE + LOWOFFSETCODE_SIZE + LENGTHCODE_SIZE + +struct ByteReader { + IByteIn super; + ar_archive_rar *rar; +}; + +struct CPpmdRAR_RangeDec { + CPpmd7_RangeDec super; + UInt32 Range; + UInt32 Code; + UInt32 Low; + IByteIn *Stream; +}; + +struct ar_archive_rar_uncomp_v3 { + struct huffman_code maincode; + struct huffman_code offsetcode; + struct huffman_code lowoffsetcode; + struct huffman_code lengthcode; + uint8_t lengthtable[HUFFMAN_TABLE_SIZE]; + uint32_t lastlength; + uint32_t lastoffset; + uint32_t oldoffset[4]; + uint32_t lastlowoffset; + uint32_t numlowoffsetrepeats; + + bool is_ppmd_block; + int ppmd_escape; + CPpmd7 ppmd7_context; + struct CPpmdRAR_RangeDec range_dec; + struct ByteReader bytein; + + struct ar_archive_rar_filters filters; +}; + +#define MAINCODE_SIZE_20 298 +#define OFFSETCODE_SIZE_20 48 +#define LENGTHCODE_SIZE_20 28 +#define HUFFMAN_TABLE_SIZE_20 4 * 257 + +struct AudioState { + int8_t weight[5]; + int16_t delta[4]; + int8_t lastdelta; + int error[11]; + int count; + uint8_t lastbyte; +}; + +struct ar_archive_rar_uncomp_v2 { + struct huffman_code maincode; + struct huffman_code offsetcode; + struct huffman_code lengthcode; + struct huffman_code audiocode[4]; + uint8_t lengthtable[HUFFMAN_TABLE_SIZE_20]; + uint32_t lastoffset; + uint32_t lastlength; + uint32_t oldoffset[4]; + uint32_t oldoffsetindex; + + bool audioblock; + uint8_t channel; + uint8_t numchannels; + struct AudioState audiostate[4]; + int8_t channeldelta; +}; + +struct ar_archive_rar_uncomp { + uint8_t version; + + LZSS lzss; + size_t bytes_ready; + bool start_new_table; + + union { + struct ar_archive_rar_uncomp_v3 v3; + struct ar_archive_rar_uncomp_v2 v2; + } state; + + struct StreamBitReader { + uint64_t bits; + int available; + bool at_eof; + } br; +}; + +bool rar_uncompress_part(ar_archive_rar *rar, void *buffer, size_t buffer_size); +int64_t rar_expand(ar_archive_rar *rar, int64_t end); +void rar_clear_uncompress(struct ar_archive_rar_uncomp *uncomp); +static inline void br_clear_leftover_bits(struct ar_archive_rar_uncomp *uncomp) { uncomp->br.available &= ~0x07; } + +/***** rar *****/ + +struct ar_archive_rar_progress { + size_t data_left; + size_t bytes_done; + uint32_t crc; +}; + +struct ar_archive_rar_solid { + size_t size_total; + bool part_done; + bool restart; +}; + +struct ar_archive_rar_s { + ar_archive super; + uint16_t archive_flags; + struct ar_archive_rar_entry entry; + struct ar_archive_rar_uncomp uncomp; + struct ar_archive_rar_progress progress; + struct ar_archive_rar_solid solid; +}; + +#endif diff --git a/cut-n-paste/unarr/rar/rarvm.c b/cut-n-paste/unarr/rar/rarvm.c new file mode 100644 index 00000000..6f738ec3 --- /dev/null +++ b/cut-n-paste/unarr/rar/rarvm.c @@ -0,0 +1,616 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/RARVirtualMachine.c */ + +#include "rarvm.h" +#include "../common/allocator.h" + +#include +#include + +typedef struct RAROpcode_s RAROpcode; + +struct RAROpcode_s { + uint8_t instruction; + uint8_t bytemode; + uint8_t addressingmode1; + uint8_t addressingmode2; + uint32_t value1; + uint32_t value2; +}; + +struct RARProgram_s { + RAROpcode *opcodes; + uint32_t length; + uint32_t capacity; +}; + +/* Program building */ + +RARProgram *RARCreateProgram() +{ + return calloc(1, sizeof(RARProgram)); +} + +void RARDeleteProgram(RARProgram *prog) +{ + if (prog) + free(prog->opcodes); + free(prog); +} + +bool RARProgramAddInstr(RARProgram *prog, uint8_t instruction, bool bytemode) +{ + if (instruction >= RARNumberOfInstructions) + return false; + if (bytemode && !RARInstructionHasByteMode(instruction)) + return false; + if (prog->length + 1 >= prog->capacity) { + /* in my small file sample, 16 is the value needed most often */ + uint32_t newCapacity = prog->capacity ? prog->capacity * 4 : 32; + RAROpcode *newCodes = calloc(newCapacity, sizeof(*prog->opcodes)); + if (!newCodes) + return false; + memcpy(newCodes, prog->opcodes, prog->capacity * sizeof(*prog->opcodes)); + free(prog->opcodes); + prog->opcodes = newCodes; + prog->capacity = newCapacity; + } + memset(&prog->opcodes[prog->length], 0, sizeof(prog->opcodes[prog->length])); + prog->opcodes[prog->length].instruction = instruction; + if (instruction == RARMovzxInstruction || instruction == RARMovsxInstruction) + prog->opcodes[prog->length].bytemode = 2; /* second argument only */ + else if (bytemode) + prog->opcodes[prog->length].bytemode = (1 | 2); + else + prog->opcodes[prog->length].bytemode = 0; + prog->length++; + return true; +} + +bool RARSetLastInstrOperands(RARProgram *prog, uint8_t addressingmode1, uint32_t value1, uint8_t addressingmode2, uint32_t value2) +{ + RAROpcode *opcode = &prog->opcodes[prog->length - 1]; + int numoperands; + + if (addressingmode1 >= RARNumberOfAddressingModes || addressingmode2 >= RARNumberOfAddressingModes) + return false; + if (!prog->length || opcode->addressingmode1 || opcode->value1 || opcode->addressingmode2 || opcode->value2) + return false; + + numoperands = NumberOfRARInstructionOperands(opcode->instruction); + if (numoperands == 0) + return true; + + if (addressingmode1 == RARImmediateAddressingMode && RARInstructionWritesFirstOperand(opcode->instruction)) + return false; + opcode->addressingmode1 = addressingmode1; + opcode->value1 = value1; + + if (numoperands == 2) { + if (addressingmode2 == RARImmediateAddressingMode && RARInstructionWritesSecondOperand(opcode->instruction)) + return false; + opcode->addressingmode2 = addressingmode2; + opcode->value2 = value2; + } + + return true; +} + +bool RARIsProgramTerminated(RARProgram *prog) +{ + return prog->length > 0 && RARInstructionIsUnconditionalJump(prog->opcodes[prog->length - 1].instruction); +} + +/* Execution */ + +#define EXTMACRO_BEGIN do { +#ifdef _MSC_VER +#define EXTMACRO_END } __pragma(warning(push)) __pragma(warning(disable:4127)) while (0) __pragma(warning(pop)) +#else +#define EXTMACRO_END } while (0) +#endif + +#define CarryFlag 1 +#define ZeroFlag 2 +#define SignFlag 0x80000000 + +#define SignExtend(a) ((uint32_t)((int8_t)(a))) + +static uint32_t _RARGetOperand(RARVirtualMachine *vm, uint8_t addressingmode, uint32_t value, bool bytemode); +static void _RARSetOperand(RARVirtualMachine *vm, uint8_t addressingmode, uint32_t value, bool bytemode, uint32_t data); + +#define GetOperand1() _RARGetOperand(vm, opcode->addressingmode1, opcode->value1, opcode->bytemode & 1) +#define GetOperand2() _RARGetOperand(vm, opcode->addressingmode2, opcode->value2, opcode->bytemode & 2) +#define SetOperand1(data) _RARSetOperand(vm, opcode->addressingmode1, opcode->value1, opcode->bytemode & 1, data) +#define SetOperand2(data) _RARSetOperand(vm, opcode->addressingmode2, opcode->value2, opcode->bytemode & 2, data) + +#define SetFlagsWithCarry(res, carry) EXTMACRO_BEGIN uint32_t result = (res); flags = (result == 0 ? ZeroFlag : (result & SignFlag)) | ((carry) ? CarryFlag : 0); EXTMACRO_END +#define SetByteFlagsWithCarry(res, carry) EXTMACRO_BEGIN uint8_t result = (res); flags = (result == 0 ? ZeroFlag : (SignExtend(result) & SignFlag)) | ((carry) ? CarryFlag : 0); EXTMACRO_END +#define SetFlags(res) SetFlagsWithCarry(res, 0) + +#define SetOperand1AndFlagsWithCarry(res, carry) EXTMACRO_BEGIN uint32_t r = (res); SetFlagsWithCarry(r, carry); SetOperand1(r); EXTMACRO_END +#define SetOperand1AndByteFlagsWithCarry(res, carry) EXTMACRO_BEGIN uint8_t r = (res); SetByteFlagsWithCarry(r, carry); SetOperand1(r); EXTMACRO_END +#define SetOperand1AndFlags(res) EXTMACRO_BEGIN uint32_t r = (res); SetFlags(r); SetOperand1(r); EXTMACRO_END + +#define NextInstruction() { opcode++; continue; } +#define Jump(offs) { uint32_t o = (offs); if (o >= prog->length) return false; opcode = &prog->opcodes[o]; continue; } + +bool RARExecuteProgram(RARVirtualMachine *vm, RARProgram *prog) +{ + RAROpcode *opcode = prog->opcodes; + uint32_t flags = 0; + uint32_t op1, op2, carry, i; + uint32_t counter = 0; + + if (!RARIsProgramTerminated(prog)) + return false; + + while ((uint32_t)(opcode - prog->opcodes) < prog->length && counter++ < RARRuntimeMaxInstructions) { + switch (opcode->instruction) { + case RARMovInstruction: + SetOperand1(GetOperand2()); + NextInstruction(); + + case RARCmpInstruction: + op1 = GetOperand1(); + SetFlagsWithCarry(op1 - GetOperand2(), result > op1); + NextInstruction(); + + case RARAddInstruction: + op1 = GetOperand1(); + if (opcode->bytemode) + SetOperand1AndByteFlagsWithCarry((op1 + GetOperand2()) & 0xFF, result < op1); + else + SetOperand1AndFlagsWithCarry(op1 + GetOperand2(), result < op1); + NextInstruction(); + + case RARSubInstruction: + op1 = GetOperand1(); +#if 0 /* apparently not correctly implemented in the RAR VM */ + if (opcode->bytemode) + SetOperand1AndByteFlagsWithCarry((op1 - GetOperand2()) & 0xFF, result > op1); + else +#endif + SetOperand1AndFlagsWithCarry(op1 - GetOperand2(), result > op1); + NextInstruction(); + + case RARJzInstruction: + if ((flags & ZeroFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARJnzInstruction: + if (!(flags & ZeroFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARIncInstruction: + if (opcode->bytemode) + SetOperand1AndFlags((GetOperand1() + 1) & 0xFF); + else + SetOperand1AndFlags(GetOperand1() + 1); + NextInstruction(); + + case RARDecInstruction: + if (opcode->bytemode) + SetOperand1AndFlags((GetOperand1() - 1) & 0xFF); + else + SetOperand1AndFlags(GetOperand1() - 1); + NextInstruction(); + + case RARJmpInstruction: + Jump(GetOperand1()); + + case RARXorInstruction: + SetOperand1AndFlags(GetOperand1() ^ GetOperand2()); + NextInstruction(); + + case RARAndInstruction: + SetOperand1AndFlags(GetOperand1() & GetOperand2()); + NextInstruction(); + + case RAROrInstruction: + SetOperand1AndFlags(GetOperand1() | GetOperand2()); + NextInstruction(); + + case RARTestInstruction: + SetFlags(GetOperand1() & GetOperand2()); + NextInstruction(); + + case RARJsInstruction: + if ((flags & SignFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARJnsInstruction: + if (!(flags & SignFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARJbInstruction: + if ((flags & CarryFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARJbeInstruction: + if ((flags & (CarryFlag | ZeroFlag))) + Jump(GetOperand1()); + NextInstruction(); + + case RARJaInstruction: + if (!(flags & (CarryFlag | ZeroFlag))) + Jump(GetOperand1()); + NextInstruction(); + + case RARJaeInstruction: + if (!(flags & CarryFlag)) + Jump(GetOperand1()); + NextInstruction(); + + case RARPushInstruction: + vm->registers[7] -= 4; + RARVirtualMachineWrite32(vm, vm->registers[7], GetOperand1()); + NextInstruction(); + + case RARPopInstruction: + SetOperand1(RARVirtualMachineRead32(vm, vm->registers[7])); + vm->registers[7] += 4; + NextInstruction(); + + case RARCallInstruction: + vm->registers[7] -= 4; + RARVirtualMachineWrite32(vm, vm->registers[7], (uint32_t)(opcode - prog->opcodes + 1)); + Jump(GetOperand1()); + + case RARRetInstruction: + if (vm->registers[7] >= RARProgramMemorySize) + return true; + i = RARVirtualMachineRead32(vm, vm->registers[7]); + vm->registers[7] += 4; + Jump(i); + + case RARNotInstruction: + SetOperand1(~GetOperand1()); + NextInstruction(); + + case RARShlInstruction: + op1 = GetOperand1(); + op2 = GetOperand2(); + SetOperand1AndFlagsWithCarry(op1 << op2, ((op1 << (op2 - 1)) & 0x80000000) != 0); + NextInstruction(); + + case RARShrInstruction: + op1 = GetOperand1(); + op2 = GetOperand2(); + SetOperand1AndFlagsWithCarry(op1 >> op2, ((op1 >> (op2 - 1)) & 1) != 0); + NextInstruction(); + + case RARSarInstruction: + op1 = GetOperand1(); + op2 = GetOperand2(); + SetOperand1AndFlagsWithCarry(((int32_t)op1) >> op2, ((op1 >> (op2 - 1)) & 1) != 0); + NextInstruction(); + + case RARNegInstruction: + SetOperand1AndFlagsWithCarry(-(int32_t)GetOperand1(), result != 0); + NextInstruction(); + + case RARPushaInstruction: + vm->registers[7] -= 32; + for (i = 0; i < 8; i++) + RARVirtualMachineWrite32(vm, vm->registers[7] + (7 - i) * 4, vm->registers[i]); + NextInstruction(); + + case RARPopaInstruction: + for (i = 0; i < 8; i++) + vm->registers[i] = RARVirtualMachineRead32(vm, vm->registers[7] + (7 - i) * 4); + vm->registers[7] += 32; + NextInstruction(); + + case RARPushfInstruction: + vm->registers[7] -= 4; + RARVirtualMachineWrite32(vm, vm->registers[7], flags); + NextInstruction(); + + case RARPopfInstruction: + flags = RARVirtualMachineRead32(vm, vm->registers[7]); + vm->registers[7] += 4; + NextInstruction(); + + case RARMovzxInstruction: + SetOperand1(GetOperand2()); + NextInstruction(); + + case RARMovsxInstruction: + SetOperand1(SignExtend(GetOperand2())); + NextInstruction(); + + case RARXchgInstruction: + op1 = GetOperand1(); + op2 = GetOperand2(); + SetOperand1(op2); + SetOperand2(op1); + NextInstruction(); + + case RARMulInstruction: + SetOperand1(GetOperand1() * GetOperand2()); + NextInstruction(); + + case RARDivInstruction: + op2 = GetOperand2(); + if (op2 != 0) + SetOperand1(GetOperand1() / op2); + NextInstruction(); + + case RARAdcInstruction: + op1 = GetOperand1(); + carry = (flags & CarryFlag); + if (opcode->bytemode) + SetOperand1AndFlagsWithCarry((op1 + GetOperand2() + carry) & 0xFF, result < op1 || (result == op1 && carry)); /* does not correctly set sign bit */ + else + SetOperand1AndFlagsWithCarry(op1 + GetOperand2() + carry, result < op1 || (result == op1 && carry)); + NextInstruction(); + + case RARSbbInstruction: + op1 = GetOperand1(); + carry = (flags & CarryFlag); + if (opcode->bytemode) + SetOperand1AndFlagsWithCarry((op1 - GetOperand2() - carry) & 0xFF, result > op1 || (result == op1 && carry)); /* does not correctly set sign bit */ + else + SetOperand1AndFlagsWithCarry(op1 - GetOperand2() - carry, result > op1 || (result == op1 && carry)); + NextInstruction(); + + case RARPrintInstruction: + /* TODO: ??? */ + NextInstruction(); + } + } + + return false; +} + +/* Memory and register access */ + +static uint32_t _RARRead32(const uint8_t *b) +{ + return ((uint32_t)b[3] << 24) | ((uint32_t)b[2] << 16) | ((uint32_t)b[1] << 8) | (uint32_t)b[0]; +} + +static void _RARWrite32(uint8_t *b, uint32_t n) +{ + b[3] = (n >> 24) & 0xFF; + b[2] = (n >> 16) & 0xFF; + b[1] = (n >> 8) & 0xFF; + b[0] = n & 0xFF; +} + +void RARSetVirtualMachineRegisters(RARVirtualMachine *vm, uint32_t registers[8]) +{ + if (registers) + memcpy(vm->registers, registers, sizeof(vm->registers)); + else + memset(vm->registers, 0, sizeof(vm->registers)); +} + +uint32_t RARVirtualMachineRead32(RARVirtualMachine *vm, uint32_t address) +{ + return _RARRead32(&vm->memory[address & RARProgramMemoryMask]); +} + +void RARVirtualMachineWrite32(RARVirtualMachine *vm, uint32_t address, uint32_t val) +{ + _RARWrite32(&vm->memory[address & RARProgramMemoryMask], val); +} + +uint8_t RARVirtualMachineRead8(RARVirtualMachine *vm, uint32_t address) +{ + return vm->memory[address & RARProgramMemoryMask]; +} + +void RARVirtualMachineWrite8(RARVirtualMachine *vm, uint32_t address, uint8_t val) +{ + vm->memory[address & RARProgramMemoryMask] = val; +} + +static uint32_t _RARGetOperand(RARVirtualMachine *vm, uint8_t addressingmode, uint32_t value, bool bytemode) +{ + if (RARRegisterAddressingMode(0) <= addressingmode && addressingmode <= RARRegisterAddressingMode(7)) { + uint32_t result = vm->registers[addressingmode % 8]; + if (bytemode) + result = result & 0xFF; + return result; + } + if (RARRegisterIndirectAddressingMode(0) <= addressingmode && addressingmode <= RARRegisterIndirectAddressingMode(7)) { + if (bytemode) + return RARVirtualMachineRead8(vm, vm->registers[addressingmode % 8]); + return RARVirtualMachineRead32(vm, vm->registers[addressingmode % 8]); + } + if (RARIndexedAbsoluteAddressingMode(0) <= addressingmode && addressingmode <= RARIndexedAbsoluteAddressingMode(7)) { + if (bytemode) + return RARVirtualMachineRead8(vm, value + vm->registers[addressingmode % 8]); + return RARVirtualMachineRead32(vm, value + vm->registers[addressingmode % 8]); + } + if (addressingmode == RARAbsoluteAddressingMode) { + if (bytemode) + return RARVirtualMachineRead8(vm, value); + return RARVirtualMachineRead32(vm, value); + } + /* if (addressingmode == RARImmediateAddressingMode) */ + return value; +} + +static void _RARSetOperand(RARVirtualMachine *vm, uint8_t addressingmode, uint32_t value, bool bytemode, uint32_t data) +{ + if (RARRegisterAddressingMode(0) <= addressingmode && addressingmode <= RARRegisterAddressingMode(7)) { + if (bytemode) + data = data & 0xFF; + vm->registers[addressingmode % 8] = data; + } + else if (RARRegisterIndirectAddressingMode(0) <= addressingmode && addressingmode <= RARRegisterIndirectAddressingMode(7)) { + if (bytemode) + RARVirtualMachineWrite8(vm, vm->registers[addressingmode % 8], (uint8_t)data); + else + RARVirtualMachineWrite32(vm, vm->registers[addressingmode % 8], data); + } + else if (RARIndexedAbsoluteAddressingMode(0) <= addressingmode && addressingmode <= RARIndexedAbsoluteAddressingMode(7)) { + if (bytemode) + RARVirtualMachineWrite8(vm, value + vm->registers[addressingmode % 8], (uint8_t)data); + else + RARVirtualMachineWrite32(vm, value + vm->registers[addressingmode % 8], data); + } + else if (addressingmode == RARAbsoluteAddressingMode) { + if (bytemode) + RARVirtualMachineWrite8(vm, value, (uint8_t)data); + else + RARVirtualMachineWrite32(vm, value, data); + } +} + +/* Instruction properties */ + +#define RAR0OperandsFlag 0 +#define RAR1OperandFlag 1 +#define RAR2OperandsFlag 2 +#define RAROperandsFlag 3 +#define RARHasByteModeFlag 4 +#define RARIsUnconditionalJumpFlag 8 +#define RARIsRelativeJumpFlag 16 +#define RARWritesFirstOperandFlag 32 +#define RARWritesSecondOperandFlag 64 +#define RARReadsStatusFlag 128 +#define RARWritesStatusFlag 256 + +static const int InstructionFlags[RARNumberOfInstructions] = { + /*RARMovInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag, + /*RARCmpInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesStatusFlag, + /*RARAddInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARSubInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARJzInstruction*/ RAR1OperandFlag | RARIsUnconditionalJumpFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJnzInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARIncInstruction*/ RAR1OperandFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARDecInstruction*/ RAR1OperandFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARJmpInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag, + /*RARXorInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARAndInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RAROrInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARTestInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesStatusFlag, + /*RARJsInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJnsInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJbInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJbeInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJaInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARJaeInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag | RARReadsStatusFlag, + /*RARPushInstruction*/ RAR1OperandFlag, + /*RARPopInstruction*/ RAR1OperandFlag, + /*RARCallInstruction*/ RAR1OperandFlag | RARIsRelativeJumpFlag, + /*RARRetInstruction*/ RAR0OperandsFlag | RARIsUnconditionalJumpFlag, + /*RARNotInstruction*/ RAR1OperandFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag, + /*RARShlInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARShrInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARSarInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARNegInstruction*/ RAR1OperandFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARWritesStatusFlag, + /*RARPushaInstruction*/ RAR0OperandsFlag, + /*RARPopaInstruction*/ RAR0OperandsFlag, + /*RARPushfInstruction*/ RAR0OperandsFlag | RARReadsStatusFlag, + /*RARPopfInstruction*/ RAR0OperandsFlag | RARWritesStatusFlag, + /*RARMovzxInstruction*/ RAR2OperandsFlag | RARWritesFirstOperandFlag, + /*RARMovsxInstruction*/ RAR2OperandsFlag | RARWritesFirstOperandFlag, + /*RARXchgInstruction*/ RAR2OperandsFlag | RARWritesFirstOperandFlag | RARWritesSecondOperandFlag | RARHasByteModeFlag, + /*RARMulInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag, + /*RARDivInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag, + /*RARAdcInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARReadsStatusFlag | RARWritesStatusFlag, + /*RARSbbInstruction*/ RAR2OperandsFlag | RARHasByteModeFlag | RARWritesFirstOperandFlag | RARReadsStatusFlag | RARWritesStatusFlag, + /*RARPrintInstruction*/ RAR0OperandsFlag +}; + +int NumberOfRARInstructionOperands(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return 0; + return InstructionFlags[instruction] & RAROperandsFlag; +} + +bool RARInstructionHasByteMode(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARHasByteModeFlag)!=0; +} + +bool RARInstructionIsUnconditionalJump(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARIsUnconditionalJumpFlag) != 0; +} + +bool RARInstructionIsRelativeJump(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARIsRelativeJumpFlag) != 0; +} + +bool RARInstructionWritesFirstOperand(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARWritesFirstOperandFlag) != 0; +} + +bool RARInstructionWritesSecondOperand(uint8_t instruction) +{ + if (instruction >= RARNumberOfInstructions) + return false; + return (InstructionFlags[instruction] & RARWritesSecondOperandFlag) != 0; +} + +/* Program debugging */ + +#ifndef NDEBUG +#include + +static void RARPrintOperand(uint8_t addressingmode, uint32_t value) +{ + if (RARRegisterAddressingMode(0) <= addressingmode && addressingmode <= RARRegisterAddressingMode(7)) + printf("r%d", addressingmode % 8); + else if (RARRegisterIndirectAddressingMode(0) <= addressingmode && addressingmode <= RARRegisterIndirectAddressingMode(7)) + printf("@(r%d)", addressingmode % 8); + else if (RARIndexedAbsoluteAddressingMode(0) <= addressingmode && addressingmode <= RARIndexedAbsoluteAddressingMode(7)) + printf("@(r%d+$%02x)", addressingmode % 8, value); + else if (addressingmode == RARAbsoluteAddressingMode) + printf("@($%02x)", value); + else if (addressingmode == RARImmediateAddressingMode) + printf("$%02x", value); +} + +void RARPrintProgram(RARProgram *prog) +{ + static const char *instructionNames[RARNumberOfInstructions] = { + "Mov", "Cmp", "Add", "Sub", "Jz", "Jnz", "Inc", "Dec", "Jmp", "Xor", + "And", "Or", "Test", "Js", "Jns", "Jb", "Jbe", "Ja", "Jae", "Push", + "Pop", "Call", "Ret", "Not", "Shl", "Shr", "Sar", "Neg", "Pusha", "Popa", + "Pushf", "Popf", "Movzx", "Movsx", "Xchg", "Mul", "Div", "Adc", "Sbb", "Print", + }; + + uint32_t i; + for (i = 0; i < prog->length; i++) { + RAROpcode *opcode = &prog->opcodes[i]; + int numoperands = NumberOfRARInstructionOperands(opcode->instruction); + printf(" %02x: %s", i, instructionNames[opcode->instruction]); + if (opcode->bytemode) + printf("B"); + if (numoperands >= 1) { + printf(" "); + RARPrintOperand(opcode->addressingmode1, opcode->value1); + } + if (numoperands == 2) { + printf(", "); + RARPrintOperand(opcode->addressingmode2, opcode->value2); + } + printf("\n"); + } +} +#endif diff --git a/cut-n-paste/unarr/rar/rarvm.h b/cut-n-paste/unarr/rar/rarvm.h new file mode 100644 index 00000000..51567a9a --- /dev/null +++ b/cut-n-paste/unarr/rar/rarvm.h @@ -0,0 +1,117 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/RARVirtualMachine.h */ + +#ifndef rar_vm_h +#define rar_vm_h + +#include +#include + +#define RARProgramMemorySize 0x40000 +#define RARProgramMemoryMask (RARProgramMemorySize - 1) +#define RARProgramWorkSize 0x3c000 +#define RARProgramGlobalSize 0x2000 +#define RARProgramSystemGlobalAddress RARProgramWorkSize +#define RARProgramSystemGlobalSize 64 +#define RARProgramUserGlobalAddress (RARProgramSystemGlobalAddress + RARProgramSystemGlobalSize) +#define RARProgramUserGlobalSize (RARProgramGlobalSize - RARProgramSystemGlobalSize) +#define RARRuntimeMaxInstructions 250000000 + +#define RARRegisterAddressingMode(n) (0 + (n)) +#define RARRegisterIndirectAddressingMode(n) (8 + (n)) +#define RARIndexedAbsoluteAddressingMode(n) (16 + (n)) +#define RARAbsoluteAddressingMode 24 +#define RARImmediateAddressingMode 25 +#define RARNumberOfAddressingModes 26 + +typedef struct RARVirtualMachine RARVirtualMachine; + +struct RARVirtualMachine { + uint32_t registers[8]; + uint8_t memory[RARProgramMemorySize + sizeof(uint32_t) /* overflow sentinel */]; +}; + +typedef struct RARProgram_s RARProgram; + +/* Program building */ + +enum { + RARMovInstruction = 0, + RARCmpInstruction = 1, + RARAddInstruction = 2, + RARSubInstruction = 3, + RARJzInstruction = 4, + RARJnzInstruction = 5, + RARIncInstruction = 6, + RARDecInstruction = 7, + RARJmpInstruction = 8, + RARXorInstruction = 9, + RARAndInstruction = 10, + RAROrInstruction = 11, + RARTestInstruction = 12, + RARJsInstruction = 13, + RARJnsInstruction = 14, + RARJbInstruction = 15, + RARJbeInstruction = 16, + RARJaInstruction = 17, + RARJaeInstruction = 18, + RARPushInstruction = 19, + RARPopInstruction = 20, + RARCallInstruction = 21, + RARRetInstruction = 22, + RARNotInstruction = 23, + RARShlInstruction = 24, + RARShrInstruction = 25, + RARSarInstruction = 26, + RARNegInstruction = 27, + RARPushaInstruction = 28, + RARPopaInstruction = 29, + RARPushfInstruction = 30, + RARPopfInstruction = 31, + RARMovzxInstruction = 32, + RARMovsxInstruction = 33, + RARXchgInstruction = 34, + RARMulInstruction = 35, + RARDivInstruction = 36, + RARAdcInstruction = 37, + RARSbbInstruction = 38, + RARPrintInstruction = 39, + RARNumberOfInstructions = 40, +}; + +RARProgram *RARCreateProgram(void); +void RARDeleteProgram(RARProgram *prog); +bool RARProgramAddInstr(RARProgram *prog, uint8_t instruction, bool bytemode); +bool RARSetLastInstrOperands(RARProgram *prog, uint8_t addressingmode1, uint32_t value1, uint8_t addressingmode2, uint32_t value2); +bool RARIsProgramTerminated(RARProgram *prog); + +/* Execution */ + +bool RARExecuteProgram(RARVirtualMachine *vm, RARProgram *prog); + +/* Memory and register access (convenience) */ + +void RARSetVirtualMachineRegisters(RARVirtualMachine *vm, uint32_t registers[8]); +uint32_t RARVirtualMachineRead32(RARVirtualMachine *vm, uint32_t address); +void RARVirtualMachineWrite32(RARVirtualMachine *vm, uint32_t address, uint32_t val); +uint8_t RARVirtualMachineRead8(RARVirtualMachine *vm, uint32_t address); +void RARVirtualMachineWrite8(RARVirtualMachine *vm, uint32_t address, uint8_t val); + +/* Instruction properties */ + +int NumberOfRARInstructionOperands(uint8_t instruction); +bool RARInstructionHasByteMode(uint8_t instruction); +bool RARInstructionIsUnconditionalJump(uint8_t instruction); +bool RARInstructionIsRelativeJump(uint8_t instruction); +bool RARInstructionWritesFirstOperand(uint8_t instruction); +bool RARInstructionWritesSecondOperand(uint8_t instruction); + +/* Program debugging */ + +#ifndef NDEBUG +void RARPrintProgram(RARProgram *prog); +#endif + +#endif diff --git a/cut-n-paste/unarr/rar/uncompress-rar.c b/cut-n-paste/unarr/rar/uncompress-rar.c new file mode 100644 index 00000000..53449ba3 --- /dev/null +++ b/cut-n-paste/unarr/rar/uncompress-rar.c @@ -0,0 +1,1000 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRAR30Handle.m */ +/* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRAR20Handle.m */ + +#include "rar.h" + +static void *gSzAlloc_Alloc(ISzAllocPtr p, size_t size) { return malloc(size); } +static void gSzAlloc_Free(ISzAllocPtr p, void *ptr) { free(ptr); } +static ISzAlloc gSzAlloc = { gSzAlloc_Alloc, gSzAlloc_Free }; + +static bool br_fill(ar_archive_rar *rar, int bits) +{ + uint8_t bytes[8]; + int count, i; + /* read as many bits as possible */ + count = (64 - rar->uncomp.br.available) / 8; + if (rar->progress.data_left < (size_t)count) + count = (int)rar->progress.data_left; + + if (bits > rar->uncomp.br.available + 8 * count || ar_read(rar->super.stream, bytes, count) != (size_t)count) { + if (!rar->uncomp.br.at_eof) { + warn("Unexpected EOF during decompression (truncated file?)"); + rar->uncomp.br.at_eof = true; + } + return false; + } + rar->progress.data_left -= count; + for (i = 0; i < count; i++) { + rar->uncomp.br.bits = (rar->uncomp.br.bits << 8) | bytes[i]; + } + rar->uncomp.br.available += 8 * count; + return true; +} + +static inline bool br_check(ar_archive_rar *rar, int bits) +{ + return bits <= rar->uncomp.br.available || br_fill(rar, bits); +} + +static inline uint64_t br_bits(ar_archive_rar *rar, int bits) +{ + return (rar->uncomp.br.bits >> (rar->uncomp.br.available -= bits)) & (((uint64_t)1 << bits) - 1); +} + +static Byte ByteIn_Read(const IByteIn *p) +{ + struct ByteReader *self = (struct ByteReader *) p; + return br_check(self->rar, 8) ? (Byte)br_bits(self->rar, 8) : 0xFF; +} + +static void ByteIn_CreateVTable(struct ByteReader *br, ar_archive_rar *rar) +{ + br->super.Read = ByteIn_Read; + br->rar = rar; +} + +/* Ppmd7 range decoder differs between 7z and RAR */ +static void PpmdRAR_RangeDec_Init(struct CPpmdRAR_RangeDec *p) +{ + int i; + p->Code = 0; + p->Low = 0; + p->Range = 0xFFFFFFFF; + for (i = 0; i < 4; i++) { + p->Code = (p->Code << 8) | p->Stream->Read(p->Stream); + } +} + +static void PpmdRAR_RangeDec_CreateVTable(struct CPpmdRAR_RangeDec *p, IByteIn *stream) +{ + p->Stream = stream; +} + +static bool rar_init_uncompress(struct ar_archive_rar_uncomp *uncomp, uint8_t version) +{ + /* per XADRARParser.m @handleForSolidStreamWithObject these versions are identical */ + if (version == 29 || version == 36) + version = 3; + else if (version == 20 || version == 26) + version = 2; + else { + warn("Unsupported compression version: %d", version); + return false; + } + if (uncomp->version) { + if (uncomp->version != version) { + warn("Compression version mismatch: %d != %d", version, uncomp->version); + return false; + } + return true; + } + memset(uncomp, 0, sizeof(*uncomp)); + uncomp->start_new_table = true; + if (!lzss_initialize(&uncomp->lzss, LZSS_WINDOW_SIZE)) { + warn("OOM during decompression"); + return false; + } + if (version == 3) { + uncomp->state.v3.ppmd_escape = 2; + uncomp->state.v3.filters.filterstart = SIZE_MAX; + } + uncomp->version = version; + return true; +} + +static void rar_free_codes(struct ar_archive_rar_uncomp *uncomp); + +void rar_clear_uncompress(struct ar_archive_rar_uncomp *uncomp) +{ + if (!uncomp->version) + return; + rar_free_codes(uncomp); + lzss_cleanup(&uncomp->lzss); + if (uncomp->version == 3) { + Ppmd7_Free(&uncomp->state.v3.ppmd7_context, &gSzAlloc); + rar_clear_filters(&uncomp->state.v3.filters); + } + uncomp->version = 0; +} + +static int rar_read_next_symbol(ar_archive_rar *rar, struct huffman_code *code) +{ + int node = 0; + + if (!code->table && !rar_make_table(code)) + return -1; + + /* performance optimization */ + if (code->tablesize <= rar->uncomp.br.available) { + uint16_t bits = (uint16_t)br_bits(rar, code->tablesize); + int length = code->table[bits].length; + int value = code->table[bits].value; + + if (length < 0) { + warn("Invalid data in bitstream"); /* invalid prefix code in bitstream */ + return -1; + } + if (length <= code->tablesize) { + /* Skip only length bits */ + rar->uncomp.br.available += code->tablesize - length; + return value; + } + + node = value; + } + + while (!rar_is_leaf_node(code, node)) { + uint8_t bit; + if (!br_check(rar, 1)) + return -1; + bit = (uint8_t)br_bits(rar, 1); + if (code->tree[node].branches[bit] < 0) { + warn("Invalid data in bitstream"); /* invalid prefix code in bitstream */ + return -1; + } + node = code->tree[node].branches[bit]; + } + + return code->tree[node].branches[0]; +} + +/***** RAR version 2 decompression *****/ + +static void rar_free_codes_v2(struct ar_archive_rar_uncomp_v2 *uncomp_v2) +{ + int i; + rar_free_code(&uncomp_v2->maincode); + rar_free_code(&uncomp_v2->offsetcode); + rar_free_code(&uncomp_v2->lengthcode); + for (i = 0; i < 4; i++) + rar_free_code(&uncomp_v2->audiocode[i]); +} + +static bool rar_parse_codes_v2(ar_archive_rar *rar) +{ + struct ar_archive_rar_uncomp_v2 *uncomp_v2 = &rar->uncomp.state.v2; + struct huffman_code precode; + uint8_t prelengths[19]; + uint16_t i, count; + int j, val, n; + bool ok = false; + + rar_free_codes_v2(uncomp_v2); + + if (!br_check(rar, 2)) + return false; + uncomp_v2->audioblock = br_bits(rar, 1) != 0; + if (!br_bits(rar, 1)) + memset(uncomp_v2->lengthtable, 0, sizeof(uncomp_v2->lengthtable)); + + if (uncomp_v2->audioblock) { + if (!br_check(rar, 2)) + return false; + uncomp_v2->numchannels = (uint8_t)br_bits(rar, 2) + 1; + count = uncomp_v2->numchannels * 257; + if (uncomp_v2->channel > uncomp_v2->numchannels) + uncomp_v2->channel = 0; + } + else + count = MAINCODE_SIZE_20 + OFFSETCODE_SIZE_20 + LENGTHCODE_SIZE_20; + + for (i = 0; i < 19; i++) { + if (!br_check(rar, 4)) + return false; + prelengths[i] = (uint8_t)br_bits(rar, 4); + } + + memset(&precode, 0, sizeof(precode)); + if (!rar_create_code(&precode, prelengths, 19)) + goto PrecodeError; + for (i = 0; i < count; ) { + val = rar_read_next_symbol(rar, &precode); + if (val < 0) + goto PrecodeError; + if (val < 16) { + uncomp_v2->lengthtable[i] = (uncomp_v2->lengthtable[i] + val) & 0x0F; + i++; + } + else if (val == 16) { + if (i == 0) { + warn("Invalid data in bitstream"); + goto PrecodeError; + } + if (!br_check(rar, 2)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 2) + 3; + for (j = 0; j < n && i < count; i++, j++) { + uncomp_v2->lengthtable[i] = uncomp_v2->lengthtable[i - 1]; + } + } + else { + if (val == 17) { + if (!br_check(rar, 3)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 3) + 3; + } + else { + if (!br_check(rar, 7)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 7) + 11; + } + for (j = 0; j < n && i < count; i++, j++) { + uncomp_v2->lengthtable[i] = 0; + } + } + } + ok = true; +PrecodeError: + rar_free_code(&precode); + if (!ok) + return false; + + if (uncomp_v2->audioblock) { + for (i = 0; i < uncomp_v2->numchannels; i++) { + if (!rar_create_code(&uncomp_v2->audiocode[i], uncomp_v2->lengthtable + i * 257, 257)) + return false; + } + } + else { + if (!rar_create_code(&uncomp_v2->maincode, uncomp_v2->lengthtable, MAINCODE_SIZE_20)) + return false; + if (!rar_create_code(&uncomp_v2->offsetcode, uncomp_v2->lengthtable + MAINCODE_SIZE_20, OFFSETCODE_SIZE_20)) + return false; + if (!rar_create_code(&uncomp_v2->lengthcode, uncomp_v2->lengthtable + MAINCODE_SIZE_20 + OFFSETCODE_SIZE_20, LENGTHCODE_SIZE_20)) + return false; + } + + rar->uncomp.start_new_table = false; + return true; +} + +static uint8_t rar_decode_audio(struct AudioState *state, int8_t *channeldelta, int8_t delta) +{ + uint8_t predbyte, byte; + int prederror; + + state->delta[3] = state->delta[2]; + state->delta[2] = state->delta[1]; + state->delta[1] = state->lastdelta - state->delta[0]; + state->delta[0] = state->lastdelta; + + predbyte = ((8 * state->lastbyte + state->weight[0] * state->delta[0] + state->weight[1] * state->delta[1] + state->weight[2] * state->delta[2] + state->weight[3] * state->delta[3] + state->weight[4] * *channeldelta) >> 3) & 0xFF; + byte = (predbyte - delta) & 0xFF; + + prederror = delta << 3; + state->error[0] += abs(prederror); + state->error[1] += abs(prederror - state->delta[0]); state->error[2] += abs(prederror + state->delta[0]); + state->error[3] += abs(prederror - state->delta[1]); state->error[4] += abs(prederror + state->delta[1]); + state->error[5] += abs(prederror - state->delta[2]); state->error[6] += abs(prederror + state->delta[2]); + state->error[7] += abs(prederror - state->delta[3]); state->error[8] += abs(prederror + state->delta[3]); + state->error[9] += abs(prederror - *channeldelta); state->error[10] += abs(prederror + *channeldelta); + + *channeldelta = state->lastdelta = (int8_t)(byte - state->lastbyte); + state->lastbyte = byte; + + if (!(++state->count & 0x1F)) { + uint8_t i, idx = 0; + for (i = 1; i < 11; i++) { + if (state->error[i] < state->error[idx]) + idx = i; + } + memset(state->error, 0, sizeof(state->error)); + + switch (idx) { + case 1: if (state->weight[0] >= -16) state->weight[0]--; break; + case 2: if (state->weight[0] < 16) state->weight[0]++; break; + case 3: if (state->weight[1] >= -16) state->weight[1]--; break; + case 4: if (state->weight[1] < 16) state->weight[1]++; break; + case 5: if (state->weight[2] >= -16) state->weight[2]--; break; + case 6: if (state->weight[2] < 16) state->weight[2]++; break; + case 7: if (state->weight[3] >= -16) state->weight[3]--; break; + case 8: if (state->weight[3] < 16) state->weight[3]++; break; + case 9: if (state->weight[4] >= -16) state->weight[4]--; break; + case 10: if (state->weight[4] < 16) state->weight[4]++; break; + } + } + + return byte; +} + +static int64_t rar_expand_v2(ar_archive_rar *rar, int64_t end) +{ + static const uint8_t lengthbases[] = + { 0, 1, 2, 3, 4, 5, 6, + 7, 8, 10, 12, 14, 16, 20, + 24, 28, 32, 40, 48, 56, 64, + 80, 96, 112, 128, 160, 192, 224 }; + static const uint8_t lengthbits[] = + { 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 2, 2, + 2, 2, 3, 3, 3, 3, 4, + 4, 4, 4, 5, 5, 5, 5 }; + static const int32_t offsetbases[] = + { 0, 1, 2, 3, 4, 6, + 8, 12, 16, 24, 32, 48, + 64, 96, 128, 192, 256, 384, + 512, 768, 1024, 1536, 2048, 3072, + 4096, 6144, 8192, 12288, 16384, 24576, + 32768, 49152, 65536, 98304, 131072, 196608, + 262144, 327680, 393216, 458752, 524288, 589824, + 655360, 720896, 786432, 851968, 917504, 983040 }; + static const uint8_t offsetbits[] = + { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 }; + static const uint8_t shortbases[] = + { 0, 4, 8, 16, 32, 64, 128, 192 }; + static const uint8_t shortbits[] = + { 2, 2, 3, 4, 5, 6, 6, 6 }; + + struct ar_archive_rar_uncomp_v2 *uncomp_v2 = &rar->uncomp.state.v2; + LZSS *lzss = &rar->uncomp.lzss; + int symbol, offs, len; + + if ((uint64_t)end > rar->super.entry_size_uncompressed + rar->solid.size_total) + end = rar->super.entry_size_uncompressed + rar->solid.size_total; + + for (;;) { + if (lzss_position(lzss) >= end) + return end; + + if (uncomp_v2->audioblock) { + uint8_t byte; + symbol = rar_read_next_symbol(rar, &uncomp_v2->audiocode[uncomp_v2->channel]); + if (symbol < 0) + return -1; + if (symbol == 256) { + rar->uncomp.start_new_table = true; + return lzss_position(lzss); + } + byte = rar_decode_audio(&uncomp_v2->audiostate[uncomp_v2->channel], &uncomp_v2->channeldelta, (int8_t)(uint8_t)symbol); + uncomp_v2->channel++; + if (uncomp_v2->channel == uncomp_v2->numchannels) + uncomp_v2->channel = 0; + lzss_emit_literal(lzss, byte); + continue; + } + + symbol = rar_read_next_symbol(rar, &uncomp_v2->maincode); + if (symbol < 0) + return -1; + if (symbol < 256) { + lzss_emit_literal(lzss, (uint8_t)symbol); + continue; + } + if (symbol == 256) { + offs = uncomp_v2->lastoffset; + len = uncomp_v2->lastlength; + } + else if (symbol <= 260) { + int idx = symbol - 256; + int lensymbol = rar_read_next_symbol(rar, &uncomp_v2->lengthcode); + offs = uncomp_v2->oldoffset[(uncomp_v2->oldoffsetindex - idx) & 0x03]; + if (lensymbol < 0 || lensymbol > (int)(sizeof(lengthbases) / sizeof(lengthbases[0])) || lensymbol > (int)(sizeof(lengthbits) / sizeof(lengthbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + len = lengthbases[lensymbol] + 2; + if (lengthbits[lensymbol] > 0) { + if (!br_check(rar, lengthbits[lensymbol])) + return -1; + len += (uint8_t)br_bits(rar, lengthbits[lensymbol]); + } + if (offs >= 0x40000) + len++; + if (offs >= 0x2000) + len++; + if (offs >= 0x101) + len++; + } + else if (symbol <= 268) { + int idx = symbol - 261; + offs = shortbases[idx] + 1; + if (shortbits[idx] > 0) { + if (!br_check(rar, shortbits[idx])) + return -1; + offs += (uint8_t)br_bits(rar, shortbits[idx]); + } + len = 2; + } + else if (symbol == 269) { + rar->uncomp.start_new_table = true; + return lzss_position(lzss); + } + else { + int idx = symbol - 270; + int offssymbol; + if (idx > (int)(sizeof(lengthbases) / sizeof(lengthbases[0])) || idx > (int)(sizeof(lengthbits) / sizeof(lengthbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + len = lengthbases[idx] + 3; + if (lengthbits[idx] > 0) { + if (!br_check(rar, lengthbits[idx])) + return -1; + len += (uint8_t)br_bits(rar, lengthbits[idx]); + } + offssymbol = rar_read_next_symbol(rar, &uncomp_v2->offsetcode); + if (offssymbol < 0 || offssymbol > (int)(sizeof(offsetbases) / sizeof(offsetbases[0])) || offssymbol > (int)(sizeof(offsetbits) / sizeof(offsetbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + offs = offsetbases[offssymbol] + 1; + if (offsetbits[offssymbol] > 0) { + if (!br_check(rar, offsetbits[offssymbol])) + return -1; + offs += (int)br_bits(rar, offsetbits[offssymbol]); + } + if (offs >= 0x40000) + len++; + if (offs >= 0x2000) + len++; + } + + uncomp_v2->lastoffset = uncomp_v2->oldoffset[uncomp_v2->oldoffsetindex++ & 0x03] = offs; + uncomp_v2->lastlength = len; + + lzss_emit_match(lzss, offs, len); + } +} + +/***** RAR version 3 decompression *****/ + +static void rar_free_codes(struct ar_archive_rar_uncomp *uncomp) +{ + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = &uncomp->state.v3; + + if (uncomp->version == 2) { + rar_free_codes_v2(&uncomp->state.v2); + return; + } + + rar_free_code(&uncomp_v3->maincode); + rar_free_code(&uncomp_v3->offsetcode); + rar_free_code(&uncomp_v3->lowoffsetcode); + rar_free_code(&uncomp_v3->lengthcode); +} + +static bool rar_parse_codes(ar_archive_rar *rar) +{ + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = &rar->uncomp.state.v3; + + if (rar->uncomp.version == 2) + return rar_parse_codes_v2(rar); + + rar_free_codes(&rar->uncomp); + + br_clear_leftover_bits(&rar->uncomp); + + if (!br_check(rar, 1)) + return false; + uncomp_v3->is_ppmd_block = br_bits(rar, 1) != 0; + if (uncomp_v3->is_ppmd_block) { + uint8_t ppmd_flags; + uint32_t max_alloc = 0; + + if (!br_check(rar, 7)) + return false; + ppmd_flags = (uint8_t)br_bits(rar, 7); + if ((ppmd_flags & 0x20)) { + if (!br_check(rar, 8)) + return false; + max_alloc = ((uint8_t)br_bits(rar, 8) + 1) << 20; + } + if ((ppmd_flags & 0x40)) { + if (!br_check(rar, 8)) + return false; + uncomp_v3->ppmd_escape = (uint8_t)br_bits(rar, 8); + } + if ((ppmd_flags & 0x20)) { + uint32_t maxorder = (ppmd_flags & 0x1F) + 1; + if (maxorder == 1) + return false; + if (maxorder > 16) + maxorder = 16 + (maxorder - 16) * 3; + + Ppmd7_Free(&uncomp_v3->ppmd7_context, &gSzAlloc); + Ppmd7_Construct(&uncomp_v3->ppmd7_context); + if (!Ppmd7_Alloc(&uncomp_v3->ppmd7_context, max_alloc, &gSzAlloc)) { + warn("OOM during decompression"); + return false; + } + ByteIn_CreateVTable(&uncomp_v3->bytein, rar); + PpmdRAR_RangeDec_CreateVTable(&uncomp_v3->range_dec, &uncomp_v3->bytein.super); + PpmdRAR_RangeDec_Init(&uncomp_v3->range_dec); + Ppmd7_Init(&uncomp_v3->ppmd7_context, maxorder); + } + else { + if (!Ppmd7_WasAllocated(&uncomp_v3->ppmd7_context)) { + warn("Invalid data in bitstream"); /* invalid PPMd sequence */ + return false; + } + PpmdRAR_RangeDec_Init(&uncomp_v3->range_dec); + } + } + else { + struct huffman_code precode; + uint8_t bitlengths[20]; + uint8_t zerocount; + int i, j, val, n; + bool ok = false; + + if (!br_check(rar, 1)) + return false; + if (!br_bits(rar, 1)) + memset(uncomp_v3->lengthtable, 0, sizeof(uncomp_v3->lengthtable)); + memset(&bitlengths, 0, sizeof(bitlengths)); + for (i = 0; i < sizeof(bitlengths); i++) { + if (!br_check(rar, 4)) + return false; + bitlengths[i] = (uint8_t)br_bits(rar, 4); + if (bitlengths[i] == 0x0F) { + if (!br_check(rar, 4)) + return false; + zerocount = (uint8_t)br_bits(rar, 4); + if (zerocount) { + for (j = 0; j < zerocount + 2 && i < sizeof(bitlengths); j++) { + bitlengths[i++] = 0; + } + i--; + } + } + } + + memset(&precode, 0, sizeof(precode)); + if (!rar_create_code(&precode, bitlengths, sizeof(bitlengths))) + goto PrecodeError; + for (i = 0; i < HUFFMAN_TABLE_SIZE; ) { + val = rar_read_next_symbol(rar, &precode); + if (val < 0) + goto PrecodeError; + if (val < 16) { + uncomp_v3->lengthtable[i] = (uncomp_v3->lengthtable[i] + val) & 0x0F; + i++; + } + else if (val < 18) { + if (i == 0) { + warn("Invalid data in bitstream"); + goto PrecodeError; + } + if (val == 16) { + if (!br_check(rar, 3)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 3) + 3; + } + else { + if (!br_check(rar, 7)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 7) + 11; + } + for (j = 0; j < n && i < HUFFMAN_TABLE_SIZE; i++, j++) { + uncomp_v3->lengthtable[i] = uncomp_v3->lengthtable[i - 1]; + } + } + else { + if (val == 18) { + if (!br_check(rar, 3)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 3) + 3; + } + else { + if (!br_check(rar, 7)) + goto PrecodeError; + n = (uint8_t)br_bits(rar, 7) + 11; + } + for (j = 0; j < n && i < HUFFMAN_TABLE_SIZE; i++, j++) { + uncomp_v3->lengthtable[i] = 0; + } + } + } + ok = true; +PrecodeError: + rar_free_code(&precode); + if (!ok) + return false; + + if (!rar_create_code(&uncomp_v3->maincode, uncomp_v3->lengthtable, MAINCODE_SIZE)) + return false; + if (!rar_create_code(&uncomp_v3->offsetcode, uncomp_v3->lengthtable + MAINCODE_SIZE, OFFSETCODE_SIZE)) + return false; + if (!rar_create_code(&uncomp_v3->lowoffsetcode, uncomp_v3->lengthtable + MAINCODE_SIZE + OFFSETCODE_SIZE, LOWOFFSETCODE_SIZE)) + return false; + if (!rar_create_code(&uncomp_v3->lengthcode, uncomp_v3->lengthtable + MAINCODE_SIZE + OFFSETCODE_SIZE + LOWOFFSETCODE_SIZE, LENGTHCODE_SIZE)) + return false; + } + + rar->uncomp.start_new_table = false; + return true; +} + +static bool rar_read_filter(ar_archive_rar *rar, bool (* decode_byte)(ar_archive_rar *rar, uint8_t *byte), int64_t *end) +{ + uint8_t flags, val, *code; + uint16_t length, i; + + if (!decode_byte(rar, &flags)) + return false; + length = (flags & 0x07) + 1; + if (length == 7) { + if (!decode_byte(rar, &val)) + return false; + length = val + 7; + } + else if (length == 8) { + if (!decode_byte(rar, &val)) + return false; + length = val << 8; + if (!decode_byte(rar, &val)) + return false; + length |= val; + } + + code = malloc(length); + if (!code) { + warn("OOM during decompression"); + return false; + } + for (i = 0; i < length; i++) { + if (!decode_byte(rar, &code[i])) { + free(code); + return false; + } + } + if (!rar_parse_filter(rar, code, length, flags)) { + free(code); + return false; + } + free(code); + + if (rar->uncomp.state.v3.filters.filterstart < (size_t)*end) + *end = rar->uncomp.state.v3.filters.filterstart; + + return true; +} + +static inline bool rar_decode_ppmd7_symbol(struct ar_archive_rar_uncomp_v3 *uncomp_v3, Byte *symbol) +{ + int value = Ppmd7z_DecodeSymbol(&uncomp_v3->ppmd7_context); + if (value < 0) { + warn("Invalid data in bitstream"); /* invalid PPMd symbol */ + return false; + } + *symbol = (Byte)value; + return true; +} + +static bool rar_decode_byte(ar_archive_rar *rar, uint8_t *byte) +{ + if (!br_check(rar, 8)) + return false; + *byte = (uint8_t)br_bits(rar, 8); + return true; +} + +static bool rar_decode_ppmd7_byte(ar_archive_rar *rar, uint8_t *byte) +{ + return rar_decode_ppmd7_symbol(&rar->uncomp.state.v3, byte); +} + +static bool rar_handle_ppmd_sequence(ar_archive_rar *rar, int64_t *end) +{ + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = &rar->uncomp.state.v3; + LZSS *lzss = &rar->uncomp.lzss; + Byte sym, code, length; + int lzss_offset; + + if (!rar_decode_ppmd7_symbol(uncomp_v3, &sym)) + return false; + if (sym != uncomp_v3->ppmd_escape) { + lzss_emit_literal(lzss, sym); + return true; + } + + if (!rar_decode_ppmd7_symbol(uncomp_v3, &code)) + return false; + switch (code) { + case 0: + return rar_parse_codes(rar); + + case 2: + rar->uncomp.start_new_table = true; + return true; + + case 3: + return rar_read_filter(rar, rar_decode_ppmd7_byte, end); + + case 4: + if (!rar_decode_ppmd7_symbol(uncomp_v3, &code)) + return false; + lzss_offset = code << 16; + if (!rar_decode_ppmd7_symbol(uncomp_v3, &code)) + return false; + lzss_offset |= code << 8; + if (!rar_decode_ppmd7_symbol(uncomp_v3, &code)) + return false; + lzss_offset |= code; + if (!rar_decode_ppmd7_symbol(uncomp_v3, &length)) + return false; + lzss_emit_match(lzss, lzss_offset + 2, length + 32); + return true; + + case 5: + if (!rar_decode_ppmd7_symbol(uncomp_v3, &length)) + return false; + lzss_emit_match(lzss, 1, length + 4); + return true; + + default: + lzss_emit_literal(lzss, sym); + return true; + } +} + +int64_t rar_expand(ar_archive_rar *rar, int64_t end) +{ + static const uint8_t lengthbases[] = + { 0, 1, 2, 3, 4, 5, 6, + 7, 8, 10, 12, 14, 16, 20, + 24, 28, 32, 40, 48, 56, 64, + 80, 96, 112, 128, 160, 192, 224 }; + static const uint8_t lengthbits[] = + { 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 2, 2, + 2, 2, 3, 3, 3, 3, 4, + 4, 4, 4, 5, 5, 5, 5 }; + static const int32_t offsetbases[] = + { 0, 1, 2, 3, 4, 6, + 8, 12, 16, 24, 32, 48, + 64, 96, 128, 192, 256, 384, + 512, 768, 1024, 1536, 2048, 3072, + 4096, 6144, 8192, 12288, 16384, 24576, + 32768, 49152, 65536, 98304, 131072, 196608, + 262144, 327680, 393216, 458752, 524288, 589824, + 655360, 720896, 786432, 851968, 917504, 983040, + 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, + 2621440, 2883584, 3145728, 3407872, 3670016, 3932160 }; + static const uint8_t offsetbits[] = + { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18 }; + static const uint8_t shortbases[] = + { 0, 4, 8, 16, 32, 64, 128, 192 }; + static const uint8_t shortbits[] = + { 2, 2, 3, 4, 5, 6, 6, 6 }; + + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = &rar->uncomp.state.v3; + LZSS *lzss = &rar->uncomp.lzss; + int symbol, offs, len, i; + + if (rar->uncomp.version == 2) + return rar_expand_v2(rar, end); + + for (;;) { + if (lzss_position(lzss) >= end) + return end; + + if (uncomp_v3->is_ppmd_block) { + if (!rar_handle_ppmd_sequence(rar, &end)) + return -1; + if (rar->uncomp.start_new_table) + return lzss_position(lzss); + continue; + } + + symbol = rar_read_next_symbol(rar, &uncomp_v3->maincode); + if (symbol < 0) + return -1; + if (symbol < 256) { + lzss_emit_literal(lzss, (uint8_t)symbol); + continue; + } + if (symbol == 256) { + if (!br_check(rar, 1)) + return -1; + if (!br_bits(rar, 1)) { + if (!br_check(rar, 1)) + return -1; + rar->uncomp.start_new_table = br_bits(rar, 1) != 0; + return lzss_position(lzss); + } + if (!rar_parse_codes(rar)) + return -1; + continue; + } + if (symbol == 257) { + if (!rar_read_filter(rar, rar_decode_byte, &end)) + return -1; + continue; + } + if (symbol == 258) { + if (uncomp_v3->lastlength == 0) + continue; + offs = uncomp_v3->lastoffset; + len = uncomp_v3->lastlength; + } + else if (symbol <= 262) { + int idx = symbol - 259; + int lensymbol = rar_read_next_symbol(rar, &uncomp_v3->lengthcode); + offs = uncomp_v3->oldoffset[idx]; + if (lensymbol < 0 || lensymbol > (int)(sizeof(lengthbases) / sizeof(lengthbases[0])) || lensymbol > (int)(sizeof(lengthbits) / sizeof(lengthbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + len = lengthbases[lensymbol] + 2; + if (lengthbits[lensymbol] > 0) { + if (!br_check(rar, lengthbits[lensymbol])) + return -1; + len += (uint8_t)br_bits(rar, lengthbits[lensymbol]); + } + for (i = idx; i > 0; i--) + uncomp_v3->oldoffset[i] = uncomp_v3->oldoffset[i - 1]; + uncomp_v3->oldoffset[0] = offs; + } + else if (symbol <= 270) { + int idx = symbol - 263; + offs = shortbases[idx] + 1; + if (shortbits[idx] > 0) { + if (!br_check(rar, shortbits[idx])) + return -1; + offs += (uint8_t)br_bits(rar, shortbits[idx]); + } + len = 2; + for (i = 3; i > 0; i--) + uncomp_v3->oldoffset[i] = uncomp_v3->oldoffset[i - 1]; + uncomp_v3->oldoffset[0] = offs; + } + else { + int idx = symbol - 271; + int offssymbol; + if (idx > (int)(sizeof(lengthbases) / sizeof(lengthbases[0])) || idx > (int)(sizeof(lengthbits) / sizeof(lengthbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + len = lengthbases[idx] + 3; + if (lengthbits[idx] > 0) { + if (!br_check(rar, lengthbits[idx])) + return -1; + len += (uint8_t)br_bits(rar, lengthbits[idx]); + } + offssymbol = rar_read_next_symbol(rar, &uncomp_v3->offsetcode); + if (offssymbol < 0 || offssymbol > (int)(sizeof(offsetbases) / sizeof(offsetbases[0])) || offssymbol > (int)(sizeof(offsetbits) / sizeof(offsetbits[0]))) { + warn("Invalid data in bitstream"); + return -1; + } + offs = offsetbases[offssymbol] + 1; + if (offsetbits[offssymbol] > 0) { + if (offssymbol > 9) { + if (offsetbits[offssymbol] > 4) { + if (!br_check(rar, offsetbits[offssymbol] - 4)) + return -1; + offs += (int)br_bits(rar, offsetbits[offssymbol] - 4) << 4; + } + if (uncomp_v3->numlowoffsetrepeats > 0) { + uncomp_v3->numlowoffsetrepeats--; + offs += uncomp_v3->lastlowoffset; + } + else { + int lowoffsetsymbol = rar_read_next_symbol(rar, &uncomp_v3->lowoffsetcode); + if (lowoffsetsymbol < 0) + return -1; + if (lowoffsetsymbol == 16) { + uncomp_v3->numlowoffsetrepeats = 15; + offs += uncomp_v3->lastlowoffset; + } + else { + offs += lowoffsetsymbol; + uncomp_v3->lastlowoffset = lowoffsetsymbol; + } + } + } + else { + if (!br_check(rar, offsetbits[offssymbol])) + return -1; + offs += (int)br_bits(rar, offsetbits[offssymbol]); + } + } + + if (offs >= 0x40000) + len++; + if (offs >= 0x2000) + len++; + + for (i = 3; i > 0; i--) + uncomp_v3->oldoffset[i] = uncomp_v3->oldoffset[i - 1]; + uncomp_v3->oldoffset[0] = offs; + } + + uncomp_v3->lastoffset = offs; + uncomp_v3->lastlength = len; + + lzss_emit_match(lzss, offs, len); + } +} + +bool rar_uncompress_part(ar_archive_rar *rar, void *buffer, size_t buffer_size) +{ + struct ar_archive_rar_uncomp *uncomp = &rar->uncomp; + struct ar_archive_rar_uncomp_v3 *uncomp_v3 = NULL; + size_t end; + + if (!rar_init_uncompress(uncomp, rar->entry.version)) + return false; + if (uncomp->version == 3) + uncomp_v3 = &uncomp->state.v3; + + for (;;) { + if (uncomp_v3 && uncomp_v3->filters.bytes_ready > 0) { + size_t count = smin(uncomp_v3->filters.bytes_ready, buffer_size); + memcpy(buffer, uncomp_v3->filters.bytes, count); + uncomp_v3->filters.bytes_ready -= count; + uncomp_v3->filters.bytes += count; + rar->progress.bytes_done += count; + buffer_size -= count; + buffer = (uint8_t *)buffer + count; + if (rar->progress.bytes_done == rar->super.entry_size_uncompressed) + goto FinishBlock; + } + else if (uncomp->bytes_ready > 0) { + int count = (int)smin(uncomp->bytes_ready, buffer_size); + lzss_copy_bytes_from_window(&uncomp->lzss, buffer, rar->progress.bytes_done + rar->solid.size_total, count); + uncomp->bytes_ready -= count; + rar->progress.bytes_done += count; + buffer_size -= count; + buffer = (uint8_t *)buffer + count; + } + if (buffer_size == 0) + return true; + + if (uncomp->br.at_eof) + return false; + + if (uncomp_v3 && uncomp_v3->filters.lastend == uncomp_v3->filters.filterstart) { + if (!rar_run_filters(rar)) + return false; + continue; + } + +FinishBlock: + if (uncomp->start_new_table && !rar_parse_codes(rar)) + return false; + + end = rar->progress.bytes_done + rar->solid.size_total + LZSS_WINDOW_SIZE - LZSS_OVERFLOW_SIZE; + if (uncomp_v3 && uncomp_v3->filters.filterstart < end) + end = uncomp_v3->filters.filterstart; + end = (size_t)rar_expand(rar, end); + if (end == (size_t)-1 || end < rar->progress.bytes_done + rar->solid.size_total) + return false; + uncomp->bytes_ready = end - rar->progress.bytes_done - rar->solid.size_total; + if (uncomp_v3) + uncomp_v3->filters.lastend = end; + + if (uncomp_v3 && uncomp_v3->is_ppmd_block && uncomp->start_new_table) + goto FinishBlock; + } +} diff --git a/cut-n-paste/unarr/unarr.h b/cut-n-paste/unarr/unarr.h new file mode 100644 index 00000000..cf3538d8 --- /dev/null +++ b/cut-n-paste/unarr/unarr.h @@ -0,0 +1,103 @@ +/* Copyright 2015 the unarr project authors (see AUTHORS file). + License: LGPLv3 */ + +#ifndef unarr_h +#define unarr_h + +#include +#include +#include +typedef int64_t off64_t; +typedef int64_t time64_t; + +#define UNARR_API_VERSION 100 + +typedef enum { + AR_ARCHIVE_ERROR_NONE, + AR_ARCHIVE_ERROR_UNKNOWN, + AR_ARCHIVE_ERROR_RAR5, + AR_ARCHIVE_ERROR_OLDRAR, + AR_ARCHIVE_ERROR_SFX +} ArArchiveError; + +/***** common/stream *****/ + +typedef struct ar_stream_s ar_stream; + +/* opens a read-only stream for the given file path; returns NULL on error */ +ar_stream *ar_open_file(const char *path); +#ifdef _WIN32 +ar_stream *ar_open_file_w(const wchar_t *path); +#endif +/* opens a read-only stream for the given chunk of memory; the pointer must be valid until ar_close is called */ +ar_stream *ar_open_memory(const void *data, size_t datalen); +#ifdef _WIN32 +typedef struct IStream IStream; +/* opens a read-only stream based on the given IStream */ +ar_stream *ar_open_istream(IStream *stream); +#endif + +/* closes the stream and releases underlying resources */ +void ar_close(ar_stream *stream); +/* tries to read 'count' bytes into buffer, advancing the read offset pointer; returns the actual number of bytes read */ +size_t ar_read(ar_stream *stream, void *buffer, size_t count); +/* moves the read offset pointer (same as fseek); returns false on failure */ +bool ar_seek(ar_stream *stream, off64_t offset, int origin); +/* shortcut for ar_seek(stream, count, SEEK_CUR); returns false on failure */ +bool ar_skip(ar_stream *stream, off64_t count); +/* returns the current read offset (or 0 on error) */ +off64_t ar_tell(ar_stream *stream); + +/***** common/unarr *****/ + +typedef struct ar_archive_s ar_archive; + +/* frees all data stored for the given archive; does not close the underlying stream */ +void ar_close_archive(ar_archive *ar); +/* reads the next archive entry; returns false on error or at the end of the file (use ar_at_eof to distinguish the two cases) */ +bool ar_parse_entry(ar_archive *ar); +/* reads the archive entry at the given offset as returned by ar_entry_get_offset (offset 0 always restarts at the first entry); should always succeed */ +bool ar_parse_entry_at(ar_archive *ar, off64_t offset); +/* reads the (first) archive entry associated with the given name; returns false if the entry couldn't be found */ +bool ar_parse_entry_for(ar_archive *ar, const char *entry_name); +/* returns whether the last ar_parse_entry call has reached the file's expected end */ +bool ar_at_eof(ar_archive *ar); + +/* returns the name of the current entry as UTF-8 string; this pointer is only valid until the next call to ar_parse_entry; returns NULL on failure */ +const char *ar_entry_get_name(ar_archive *ar); +/* returns the stream offset of the current entry for use with ar_parse_entry_at */ +off64_t ar_entry_get_offset(ar_archive *ar); +/* returns the total size of uncompressed data of the current entry; read exactly that many bytes using ar_entry_uncompress */ +size_t ar_entry_get_size(ar_archive *ar); +/* returns the stored modification date of the current entry in 100ns since 1601/01/01 */ +time64_t ar_entry_get_filetime(ar_archive *ar); +/* WARNING: don't manually seek in the stream between ar_parse_entry and the last corresponding ar_entry_uncompress call! */ +/* uncompresses the next 'count' bytes of the current entry into buffer; returns false on error */ +bool ar_entry_uncompress(ar_archive *ar, void *buffer, size_t count); + +/* copies at most 'count' bytes of the archive's global comment (if any) into buffer; returns the actual amout of bytes copied (or, if 'buffer' is NULL, the required buffer size) */ +size_t ar_get_global_comment(ar_archive *ar, void *buffer, size_t count); + +/***** rar/rar *****/ + +/* checks whether 'stream' could contain RAR data and prepares for archive listing/extraction; returns NULL on failure */ +ar_archive *ar_open_rar_archive(ar_stream *stream); +ar_archive *ar_open_rar_archive_with_error(ar_stream *stream, ArArchiveError *error_code); + +/***** tar/tar *****/ + +/* checks whether 'stream' could contain TAR data and prepares for archive listing/extraction; returns NULL on failure */ +ar_archive *ar_open_tar_archive(ar_stream *stream); + +/***** zip/zip *****/ + +/* checks whether 'stream' could contain ZIP data and prepares for archive listing/extraction; returns NULL on failure */ +/* set deflatedonly for extracting XPS, EPUB, etc. documents where non-Deflate compression methods are not supported by specification */ +ar_archive *ar_open_zip_archive(ar_stream *stream, bool deflatedonly); + +/***** _7z/_7z *****/ + +/* checks whether 'stream' could contain 7Z data and prepares for archive listing/extraction; returns NULL on failure */ +ar_archive *ar_open_7z_archive(ar_stream *stream); + +#endif diff --git a/po/POTFILES.skip b/po/POTFILES.skip index a96b4629..6d14dc5c 100644 --- a/po/POTFILES.skip +++ b/po/POTFILES.skip @@ -30,6 +30,7 @@ backend/tiff/tiffdocument.evince-backend.desktop.in backend/xps/evince-xpsdocument.metainfo.xml.in backend/xps/xpsdocument.evince-backend.desktop.in cut-n-paste/libdazzle/dzl-file-manager.c +cut-n-paste/unarr/common/conv.c data/org.gnome.Evince.desktop.in data/org.gnome.Evince-previewer.desktop.in data/org.gnome.evince.Daemon.service.in -- 2.35.1 From 692c16da2a6b4ed80c10ad0f5541fd2b47c36ec5 Mon Sep 17 00:00:00 2001 From: David King Date: Mon, 18 Apr 2022 08:23:59 +0100 Subject: [PATCH 2/2] Revert "comics: Use libarchive for RAR support" This reverts commit e25912b3a2fa91d8d05d0a683303a8d0a39541b5. --- backend/comics/ev-archive.c | 68 +++++++++++++++++++++++++++++++++---- backend/comics/ev-archive.h | 1 + backend/comics/meson.build | 1 + meson.build | 2 +- 4 files changed, 65 insertions(+), 7 deletions(-) diff --git a/backend/comics/ev-archive.c b/backend/comics/ev-archive.c index 568e1621..7159ca09 100644 --- a/backend/comics/ev-archive.c +++ b/backend/comics/ev-archive.c @@ -22,6 +22,7 @@ #include #include +#include #include #define BUFFER_SIZE (64 * 1024) @@ -33,6 +34,10 @@ struct _EvArchive { /* libarchive */ struct archive *libar; struct archive_entry *libar_entry; + + /* unarr */ + ar_stream *unarr_stream; + ar_archive *unarr; }; G_DEFINE_TYPE(EvArchive, ev_archive, G_TYPE_OBJECT); @@ -44,9 +49,13 @@ ev_archive_finalize (GObject *object) switch (archive->type) { case EV_ARCHIVE_TYPE_RAR: + g_clear_pointer (&archive->unarr, ar_close_archive); + g_clear_pointer (&archive->unarr_stream, ar_close); + break; case EV_ARCHIVE_TYPE_ZIP: case EV_ARCHIVE_TYPE_7Z: case EV_ARCHIVE_TYPE_TAR: + case EV_ARCHIVE_TYPE_RAR5: g_clear_pointer (&archive->libar, archive_free); break; default: @@ -83,10 +92,9 @@ libarchive_set_archive_type (EvArchive *archive, archive_read_support_format_7zip (archive->libar); else if (archive_type == EV_ARCHIVE_TYPE_TAR) archive_read_support_format_tar (archive->libar); - else if (archive_type == EV_ARCHIVE_TYPE_RAR) { - archive_read_support_format_rar (archive->libar); + else if (archive_type == EV_ARCHIVE_TYPE_RAR5) archive_read_support_format_rar5 (archive->libar); - } else + else g_assert_not_reached (); } @@ -107,9 +115,12 @@ ev_archive_set_archive_type (EvArchive *archive, switch (archive_type) { case EV_ARCHIVE_TYPE_RAR: + archive->type = archive_type; + break; case EV_ARCHIVE_TYPE_ZIP: case EV_ARCHIVE_TYPE_7Z: case EV_ARCHIVE_TYPE_TAR: + case EV_ARCHIVE_TYPE_RAR5: libarchive_set_archive_type (archive, archive_type); break; default: @@ -125,6 +136,7 @@ ev_archive_open_filename (EvArchive *archive, GError **error) { int r; + ArArchiveError code; g_return_val_if_fail (EV_IS_ARCHIVE (archive), FALSE); g_return_val_if_fail (archive->type != EV_ARCHIVE_TYPE_NONE, FALSE); @@ -134,9 +146,28 @@ ev_archive_open_filename (EvArchive *archive, case EV_ARCHIVE_TYPE_NONE: g_assert_not_reached (); case EV_ARCHIVE_TYPE_RAR: + archive->unarr_stream = ar_open_file (path); + if (archive->unarr_stream == NULL) { + g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_FAILED, + "Error opening archive"); + return FALSE; + } + archive->unarr = ar_open_rar_archive_with_error (archive->unarr_stream, &code); + if (archive->unarr == NULL) { + g_clear_pointer (&archive->unarr_stream, ar_close); + if (code == AR_ARCHIVE_ERROR_RAR5) { + libarchive_set_archive_type (archive, EV_ARCHIVE_TYPE_RAR5); + return ev_archive_open_filename (archive, path, error); + } + g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_FAILED, + "Error opening RAR archive"); + return FALSE; + } + return TRUE; case EV_ARCHIVE_TYPE_ZIP: case EV_ARCHIVE_TYPE_7Z: case EV_ARCHIVE_TYPE_TAR: + case EV_ARCHIVE_TYPE_RAR5: r = archive_read_open_filename (archive->libar, path, BUFFER_SIZE); if (r != ARCHIVE_OK) { g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED, @@ -189,9 +220,11 @@ ev_archive_read_next_header (EvArchive *archive, case EV_ARCHIVE_TYPE_NONE: g_assert_not_reached (); case EV_ARCHIVE_TYPE_RAR: + return ar_parse_entry (archive->unarr); case EV_ARCHIVE_TYPE_ZIP: case EV_ARCHIVE_TYPE_7Z: case EV_ARCHIVE_TYPE_TAR: + case EV_ARCHIVE_TYPE_RAR5: return libarchive_read_next_header (archive, error); } @@ -217,9 +250,12 @@ ev_archive_get_entry_pathname (EvArchive *archive) case EV_ARCHIVE_TYPE_NONE: g_assert_not_reached (); case EV_ARCHIVE_TYPE_RAR: + g_return_val_if_fail (archive->unarr != NULL, NULL); + return ar_entry_get_name (archive->unarr); case EV_ARCHIVE_TYPE_ZIP: case EV_ARCHIVE_TYPE_7Z: case EV_ARCHIVE_TYPE_TAR: + case EV_ARCHIVE_TYPE_RAR5: g_return_val_if_fail (archive->libar_entry != NULL, NULL); return archive_entry_pathname (archive->libar_entry); } @@ -234,12 +270,15 @@ ev_archive_get_entry_size (EvArchive *archive) g_return_val_if_fail (archive->type != EV_ARCHIVE_TYPE_NONE, -1); switch (archive->type) { + case EV_ARCHIVE_TYPE_RAR: + g_return_val_if_fail (archive->unarr != NULL, -1); + return ar_entry_get_size (archive->unarr); case EV_ARCHIVE_TYPE_NONE: g_assert_not_reached (); - case EV_ARCHIVE_TYPE_RAR: case EV_ARCHIVE_TYPE_ZIP: case EV_ARCHIVE_TYPE_7Z: case EV_ARCHIVE_TYPE_TAR: + case EV_ARCHIVE_TYPE_RAR5: g_return_val_if_fail (archive->libar_entry != NULL, -1); return archive_entry_size (archive->libar_entry); } @@ -254,12 +293,16 @@ ev_archive_get_entry_is_encrypted (EvArchive *archive) g_return_val_if_fail (archive->type != EV_ARCHIVE_TYPE_NONE, FALSE); switch (archive->type) { + case EV_ARCHIVE_TYPE_RAR: + g_return_val_if_fail (archive->unarr != NULL, FALSE); + /* password-protected RAR is not even detected right now */ + return FALSE; case EV_ARCHIVE_TYPE_NONE: g_assert_not_reached (); - case EV_ARCHIVE_TYPE_RAR: case EV_ARCHIVE_TYPE_ZIP: case EV_ARCHIVE_TYPE_7Z: case EV_ARCHIVE_TYPE_TAR: + case EV_ARCHIVE_TYPE_RAR5: g_return_val_if_fail (archive->libar_entry != NULL, -1); return archive_entry_is_encrypted (archive->libar_entry); } @@ -279,12 +322,21 @@ ev_archive_read_data (EvArchive *archive, g_return_val_if_fail (archive->type != EV_ARCHIVE_TYPE_NONE, -1); switch (archive->type) { + case EV_ARCHIVE_TYPE_RAR: + g_return_val_if_fail (archive->unarr != NULL, -1); + if (!ar_entry_uncompress (archive->unarr, buf, count)) { + g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_FAILED, + "Failed to decompress RAR data"); + return -1; + } + r = count; + break; case EV_ARCHIVE_TYPE_NONE: g_assert_not_reached (); - case EV_ARCHIVE_TYPE_RAR: case EV_ARCHIVE_TYPE_ZIP: case EV_ARCHIVE_TYPE_7Z: case EV_ARCHIVE_TYPE_TAR: + case EV_ARCHIVE_TYPE_RAR5: g_return_val_if_fail (archive->libar_entry != NULL, -1); r = archive_read_data (archive->libar, buf, count); if (r < 0) { @@ -305,9 +357,13 @@ ev_archive_reset (EvArchive *archive) switch (archive->type) { case EV_ARCHIVE_TYPE_RAR: + g_clear_pointer (&archive->unarr, ar_close_archive); + g_clear_pointer (&archive->unarr_stream, ar_close); + break; case EV_ARCHIVE_TYPE_ZIP: case EV_ARCHIVE_TYPE_7Z: case EV_ARCHIVE_TYPE_TAR: + case EV_ARCHIVE_TYPE_RAR5: g_clear_pointer (&archive->libar, archive_free); libarchive_set_archive_type (archive, archive->type); archive->libar_entry = NULL; diff --git a/backend/comics/ev-archive.h b/backend/comics/ev-archive.h index b4e1399c..c6af4fa4 100644 --- a/backend/comics/ev-archive.h +++ b/backend/comics/ev-archive.h @@ -29,6 +29,7 @@ G_DECLARE_FINAL_TYPE (EvArchive, ev_archive, EV, ARCHIVE, GObject) typedef enum { EV_ARCHIVE_TYPE_NONE = 0, EV_ARCHIVE_TYPE_RAR, + EV_ARCHIVE_TYPE_RAR5, EV_ARCHIVE_TYPE_ZIP, EV_ARCHIVE_TYPE_7Z, EV_ARCHIVE_TYPE_TAR diff --git a/backend/comics/meson.build b/backend/comics/meson.build index c245b1ba..1e740194 100644 --- a/backend/comics/meson.build +++ b/backend/comics/meson.build @@ -7,6 +7,7 @@ incs = backends_incs + [cut_n_paste_inc] deps = backends_deps + [ libarchive_dep, + libunarr_dep, ] shared_module( diff --git a/meson.build b/meson.build index bcc54b2e..3a92db30 100644 --- a/meson.build +++ b/meson.build @@ -349,7 +349,7 @@ else endif # *** Comic Book *** -libarchive_req_version = '>= 3.6.0' +libarchive_req_version = '>= 3.2.0' libarchive_dep = dependency('libarchive', version: libarchive_req_version, required: get_option('comics')) enable_comics = libarchive_dep.found() if enable_comics -- 2.35.1