Package with pre-generated sources

Instead of carrying a bundle of NodeJS “dev” dependencies and building
sources in the RPM build environment, pre-generate the API header and
C library source file from the contents of the primary source archive,
and place them in a second source archive.

See comments in the spec file for rationale.
This commit is contained in:
Benjamin A. Beasley 2021-12-10 17:27:54 -05:00
parent f460573ec4
commit f19487151d
7 changed files with 100 additions and 409 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/llhttp-6.0.6-nm-dev.tgz
/llhttp-6.0.6.tar.gz
/llhttp-6.0.6-generated.tar.xz

View File

@ -1,31 +0,0 @@
[any]
[prod]
[dev]
# Just a module wrapper around the code in tslib, which does have a proper
# license in its package.json:
# tslib/modules
modules = "<unknown version>"
# A “dummy” module in the tests for tslib
# tslib/test/validateModuleExportsMatchCommonJS
validateModuleExportsMatchCommonJS = "<unknown version>"
# These are all “dummy” modules in the tests for resolve:
# resolve/test/module_dir/zmodules/bbb
bbb = "<unknown version>"
# resolve/test/resolver/invalid_main
"invalid main" = "<unknown version>"
# resolve/test/resolver/incorrect_main
incorrect_main = "<unknown version>"
# resolve/test/resolver/dot_slash_main
dot_slash_main = "<unknown version>"
# resolve/test/resolver/dot_main
dot_main = "<unknown version>"
# resolve/test/resolver/baz
baz = "<unknown version>"
# resolve/test/resolver/browser_field
browser_field = "<unknown version>"
# resolve/test/resolver/symlinked/package
package = "<unknown version>"

View File

@ -1,191 +0,0 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import json
from argparse import ArgumentParser, FileType, RawDescriptionHelpFormatter
from pathlib import Path
from sys import exit, stderr
import toml
def main():
args = parse_args()
problem = False
if not args.tree.is_dir():
return f"Not a directory: {args.tree}"
for pjpath in args.tree.glob("**/package.json"):
name, version, license = parse(pjpath)
identity = f"{name} {version}"
if version in args.exceptions.get(name, ()):
continue # Do not even check the license
elif license is None:
problem = True
print(
f"Missing license in package.json for {identity}", file=stderr
)
elif isinstance(license, dict):
if isinstance(license.get("type"), str):
continue
print(
(
"Missing type for (deprecated) license object in "
f"package.json for {identity}: {license}"
),
file=stderr,
)
elif isinstance(license, list):
if license and all(
isinstance(entry, dict) and isinstance(entry.get("type"), str)
for entry in license
):
continue
print(
(
"Defective (deprecated) licenses array-of objects in "
f"package.json for {identity}: {license}"
),
file=stderr,
)
elif isinstance(license, str):
continue
else:
print(
(
"Weird type for license in "
f"package.json for {identity}: {license}"
),
file=stderr,
)
problem = True
if problem:
return "At least one missing license was found."
def check_exception(exceptions, name, version):
x = args.exceptions
def parse(package_json_path):
with package_json_path.open("rb") as pjfile:
pj = json.load(pjfile)
try:
license = pj["license"]
except KeyError:
license = pj.get("licenses")
try:
name = pj["name"]
except KeyError:
name = package_json_path.parent.name
version = pj.get("version", "<unknown version>")
return name, version, license
def parse_args():
parser = ArgumentParser(
formatter_class=RawDescriptionHelpFormatter,
description=(
"Search for bundled dependencies without declared licenses"
),
epilog="""
The exceptions file must be a TOML file with zero or more tables. Each tables
keys are package names; the corresponding values values are exact version
number strings, or arrays of version number strings, that have been manually
audited to determine their license status and should therefore be ignored.
Exceptions in a table called “any” are always applied. Otherwise, exceptions
are applied only if a corresponding --with TABLENAME argument is given;
multiple such arguments may be given.
For
example:
[any]
example-foo = "1.0.0"
[prod]
example-bar = [ "2.0.0", "2.0.1",]
[dev]
example-bat = [ "3.7.4",]
would always ignore version 1.0.0 of example-foo. It would ignore example-bar
2.0.1 only when called with “--with prod”.
Comments may (and should) be used to describe the manual audits upon which the
exclusions are based.
Otherwise, any package.json with missing or null license field in the tree is
considered an error, and the program returns with nonzero status.
""",
)
parser.add_argument(
"-x",
"--exceptions",
type=FileType("r"),
help="Manually audited package versions file",
)
parser.add_argument(
"-w",
"--with",
action="append",
default=[],
help="Enable a table in the exceptions file",
)
parser.add_argument(
"tree",
metavar="node_modules_dir",
type=Path,
help="Path to search recursively",
default=".",
)
args = parser.parse_args()
if args.exceptions is None:
args.exceptions = {}
xname = None
else:
with args.exceptions as xfile:
xname = getattr(xfile, "name", "<exceptions>")
args.exceptions = toml.load(args.exceptions)
if not isinstance(args.exceptions, dict):
parser.error(f"Invalid format in {xname}: not an object")
for tablename, table in args.exceptions.items():
if not isinstance(table, dict):
parser.error(
f"Non-table entry in {xname}: {tablename} = {table!r}"
)
overlay = {}
for key, value in table.items():
if isinstance(value, str):
overlay[key] = [value]
elif not isinstance(value, list) or not all(
isinstance(entry, str) for entry in value
):
parser.error(
f"Invalid format in {xname} in [{tablename}]: "
f"{key!r} = {value!r}"
)
table.update(overlay)
x = args.exceptions.get("any", {})
for add in getattr(args, "with"):
try:
x.update(args.exceptions[add])
except KeyError:
if xname is None:
parser.error(
f"No table {add}, as no exceptions file was given"
)
else:
parser.error(f"No table {add} in {xname}")
# Store the merged dictionary
args.exceptions = x
return args
if __name__ == "__main__":
exit(main())

57
generate-source1 Executable file
View File

@ -0,0 +1,57 @@
#!/bin/sh
set -o nounset
set -o errexit
usage() {
cat <<EOF
Usage: ${0} SOURCE0
Given the path to the primary source archive:
- Extract it to a temporary directory
- Use "npm install" in the temporary directory, downloading a large number of
dependencies for generating C code from TypeScript sources
- Do C code generation within the NodeJS ecosystem
Package the generated code (only) in an archive suitable for overlaying on the
primary source archive, and copy it to the current working directory.
EOF
}
if [ "$#" != '1' ]
then
usage
exit 1
elif [ "${1}" = '-h' ] || [ "${1}" = '--help' ]
then
usage
exit 0
fi
SOURCE0="${1}"
RUNDIR="${PWD}"
ARCHBASE="$(basename "${SOURCE0}" '.tar.gz')"
TMP_DIR="$(mktemp -d)"
trap "cd /; rm -rf '${TMP_DIR}'" INT TERM EXIT
cp -p "${SOURCE0}" "${TMP_DIR}"
cd "${TMP_DIR}"
status() {
echo "=== ${1} ===" 1>&2
}
status "Extracting ${ARCHBASE}"
tar -xzf "${ARCHBASE}.tar.gz"
XDIR="$(find . -mindepth 1 -maxdepth 1 -type d -print -quit)"
cd "${XDIR}"
status 'Downloading code-generation machinery from NPM'
npm install
status 'Generating C sources'
make generate
cd "${TMP_DIR}"
status 'Archiving C sources'
GENARCHBASE="${ARCHBASE}-generated"
tar -cvf "${GENARCHBASE}.tar" "${XDIR}/build"
xz -9e "${GENARCHBASE}.tar"
cp -vp "${GENARCHBASE}.tar.xz" "${RUNDIR}"

View File

@ -1,109 +0,0 @@
#!/bin/bash
set -o nounset
set -o errexit
OUTPUT_DIR="$(rpm -E '%{_sourcedir}')"
SPEC_FILE="${PWD}/llhttp.spec"
usage() {
cat 1>&2 <<EOF
Usage: $(basename "$0")
Given llhttp.spec in the working directory, download the source and the prod
and dev dependencies, each in their own tarball.
Also finds licenses for prod dependencies.
All three tarballs and the license list are copied to
${OUTPUT_DIR}.
EOF
exit 1
}
if ! [[ -f /usr/bin/npm ]]
then
cat 1>&2 <<EOF
$(basename "${0}") requires npm to run
Run the following to fix this:
sudo dnf install npm
EOF
exit 2
fi
if [[ $# -gt 0 ]]; then
usage
fi
TMP_DIR="$(mktemp -d -t ci-XXXXXXXXXX)"
trap "cd /; rm -rf '${TMP_DIR}'" INT TERM EXIT
cd "${TMP_DIR}"
echo "Reading ${SPEC_FILE}; downloading source archive" 1>&2
VERSION="$(awk '$1 == "Version:" { print $2; exit }' "${SPEC_FILE}")"
echo "Version is ${VERSION}" 1>&2
echo "Downloading source archive" 1>&2
spectool -g "${SPEC_FILE}"
ARCHIVE="$(
find . -mindepth 1 -maxdepth 1 -type f -name '*.tar.gz' -print -quit
)"
echo "Downloaded $(basename "${ARCHIVE}")" 1>&2
tar -xzf "${ARCHIVE}"
XDIR="$(find . -mindepth 1 -maxdepth 1 -type d -print -quit)"
echo "Extracted to $(basename "${XDIR}")" 1>&2
cd "${XDIR}"
echo "Downloading prod dependencies" 1>&2
# Compared to nodejs-packaging-bundler, we must add --ignore-scripts or npm
# unsuccessfully attempts to build the package.
npm install --no-optional --only=prod --ignore-scripts
echo "Successful prod dependencies download" 1>&2
mv node_modules/ node_modules_prod
echo "LICENSES IN BUNDLE:"
LICENSE_FILE="${TMP_DIR}/llhttp-${VERSION}-bundled-licenses.txt"
find . -name 'package.json' -exec jq '.license | strings' '{}' ';' \
>> "${LICENSE_FILE}"
for what in '.license | objects | .type' '.licenses[] .type'
do
find . -name 'package.json' -exec jq "${what}" '{}' ';' \
>> "${LICENSE_FILE}" 2>/dev/null
done
sort -u -o "${LICENSE_FILE}" "${LICENSE_FILE}"
# Locate any dependencies without a provided license
find . -type f -name 'package.json' -execdir jq \
'if .license==null and .licenses==null then .name else null end' '{}' '+' |
grep -vE '^null$' |
sort -u > "${TMP_DIR}/nolicense.txt"
if [[ -s "${TMP_DIR}/nolicense.txt" ]]
then
echo -e "\e[5m\e[41mSome dependencies do not list a license. Manual verification required!\e[0m"
cat "${TMP_DIR}/nolicense.txt"
echo -e "\e[5m\e[41m======================================================================\e[0m"
fi
echo "Downloading dev dependencies" 1>&2
# Compared to nodejs-packaging-bundler, we must add --ignore-scripts or npm
# unsuccessfully attempts to build the package.
npm install --no-optional --only=dev --ignore-scripts
echo "Successful dev dependencies download" 1>&2
mv node_modules/ node_modules_dev
if [[ -d node_modules_prod ]]
then
tar -czf "../llhttp-${VERSION}-nm-prod.tgz" node_modules_prod
fi
if [[ -d node_modules_dev ]]
then
tar -czf "../llhttp-${VERSION}-nm-dev.tgz" node_modules_dev
fi
cd ..
find . -mindepth 1 -maxdepth 1 -type f \( -name "$(basename "${ARCHIVE}")" \
-o -name "llhttp-${VERSION}*" \) -exec cp -vp '{}' "${OUTPUT_DIR}" ';'

View File

@ -4,19 +4,37 @@
# IR, using llparse (https://github.com/nodejs/llparse)—all of which happens
# within the NodeJS ecosystem.
#
# The package therefore “builds like” a NodeJS package, and to the extent they
# are relevant we apply the NodeJS packaging guidelines. However, the result of
# the build “installs like” a traditional C library package and has no NodeJS
# dependencies, including bundled ones.
# There are two approaches that could potentially work here:
# 1) Apply the Node.js packaging guidelines, form a “dev” dependency
# bundle, using something resembling the nodejs-packaging-bundler script.
# (Note that the package *is* registered with npm as “llhttp”, although
# current releases are not published there.) Use the resulting bundle to
# generate the C sources in the RPM build environment.
# 2) Applying
# https://docs.fedoraproject.org/en-US/packaging-guidelines/what-can-be-packaged/#_pregenerated_code,
# consider the C sources to be pre-generated code that is generated with
# tools (ts-node plus an array of NodeJS packages) that are not included in
# Fedora. This not preferred, but is allowable as long as the original
# sources (i.e., the TypeScript sources in Source0) are present. Generate
# them once as part of the packaging process using a temporary “npm
# install”ed toolchain.
#
# Furthermore, the package is registered with npm as “llhttp”, but current
# releases are not published there, so we use the GitHub archive as the
# canonical source and use a custom bundler script based on
# nodejs-packaging-bundler to fetch NodeJS build dependencies.
# We choose the second approach. It has a couple of advantages:
#
# Overall, we cherry-pick from the standard and NodeJS packaging guidelines as
# each seems to best apply, understanding that this package does not fit well
# into any of the usual patterns or templates.
# - It keeps this package from being dragged into ongoing community concerns
# about otherwise-impermissible content in NPM dependency bundles generated
# according to NodeJS packaging guidelines (such as generated code without
# all accompanying sources, pre-minified or bundled web assets, or bundled
# fonts).
# - It gives us the possibility of building the library on platforms not
# supported by NodeJS, although no Fedora primary architectures currently
# fall into that category.
#
# It also has one big disadvantage:
#
# - Without a big bundle of NodeJS “dev” dependencies, we cannot run upstreams
# test suite—which, granted, seems to operates on JavaScript realization of
# the library rather than on the C library we have compiled.
# Upstream has been asked to provide a proper .so version:
# https://github.com/nodejs/llhttp/issues/140
@ -28,38 +46,15 @@ Version: 6.0.6
Release: %autorelease
Summary: Port of http_parser to llparse
# License of llhttp is MIT; nothing from the NodeJS dependency bundle is
# installed, so its contents do not contribute to the license of the binary
# RPMs, and we do not need a file llhttp-%%{version}-bundled-licenses.txt.
License: MIT
%global forgeurl https://github.com/nodejs/llhttp
%forgemeta
URL: %{forgeurl}
Source0: %{forgesource}
# Based closely on nodejs-packaging-bundler, except:
#
# - The GitHub source tarball specified in this spec file is used since the
# current version is not typically published on npm
# - No production dependency bundle is generated, since none is needed—and
# therefore, no bundled licenses text file is generated either
Source1: llhttp-packaging-bundler
# Created with llhttp-packaging-bundler (Source1):
Source2: llhttp-%{version}-nm-dev.tgz
# While nothing in the dev bundle is installed, we still choose to audit for
# null licenses at build time and to keep manually-approved exceptions in a
# file.
Source3: check-null-licenses
Source4: audited-null-licenses.toml
# The compiled RPM does not depend on NodeJS at all, but we cannot *build* it
# on architectures without NodeJS.
ExclusiveArch: %{nodejs_arches}
# For generating the C source “release” from TypeScript:
BuildRequires: nodejs-devel
BuildRequires: make
# Generated from Source0 using Source2:
# ./generate-source1 ${SOURCE0}
Source1: %{archivename}-generated.tar.xz
Source2: generate-source1
# For compiling the C library
BuildRequires: cmake
@ -68,10 +63,6 @@ BuildRequires: gcc
# For tests
BuildRequires: clang
# For check-null-licenses
BuildRequires: python3-devel
BuildRequires: python3dist(toml)
%description
This project is a port of http_parser to TypeScript. llparse is used to
generate the output C source file, which could be compiled and linked with the
@ -91,24 +82,15 @@ developing applications that use llhttp.
%prep
%forgeautosetup
# Set up bundled (dev) node modules required to generate the C sources from the
# TypeScript sources.
tar -xzf '%{SOURCE2}'
mkdir -p node_modules
pushd node_modules
ln -s ../node_modules_dev/* .
ln -s ../node_modules_dev/.bin .
popd
# We run ts-node out of node_modules/.bin rather than using npx (which we will
# not have available).
sed -r -i 's@\bnpx[[:blank:]](ts-node)\b@node_modules/.bin/\1@' Makefile
# Overlay the generated sources in build/
%setup -q -T -D -b 1 -n %{extractdir}
# When making the “release” distribution, dont try to use npm since we already
# have matching generated sources.
sed -r -i 's/(release: )generate/\1/' Makefile
%build
# Generate the C source “release” from TypeScript using the “node_modules_dev”
# bundle.
# Generate the C source “release” using the pre-generated sources.
%make_build release
# Apply downstream .so versioning
cat >> release/CMakeLists.txt <<'EOF'
@ -120,10 +102,6 @@ EOF
sed -r -i 's@\b(DESTINATION[[:blank:]]+)lib($|/)@\1%{_libdir}\2@' \
release/CMakeLists.txt
# To help prove that nothing from the bundled NodeJS dependencies is included
# in the binary packages, remove the “node_modules” symlinks.
rm -rvf node_modules
cd release
%cmake -DBUILD_SHARED_LIBS:BOOL=ON
%cmake_build
@ -141,22 +119,8 @@ fi
%check
# Symlink the NodeJS bundle again so that we can test with Mocha
mkdir -p node_modules
pushd node_modules
ln -s ../node_modules_dev/* .
ln -s ../node_modules_dev/.bin .
popd
# Verify that no bundled dev dependency has a null license field, unless we
# already audited it by hand. This reduces the chance of accidentally including
# code with license problems in the source RPM.
%{python3} '%{SOURCE3}' --exceptions '%{SOURCE4}' --with dev node_modules_dev
# See scripts.mocha in package.json:
NODE_ENV=test ./node_modules/.bin/mocha \
-r ts-node/register/type-check \
test/*-test.ts
# There are no upstream tests that run directly against the C library, only a
# Mocha-based test suite that operates within NodeJS.
%files

View File

@ -1,2 +1,2 @@
SHA512 (llhttp-6.0.6-nm-dev.tgz) = ea8905b57f51ad2d870d17dc579ec5fe2175b3bb898f304af4f1e3bd52782488dfb9bb38281f1a826d1745fa608e0200e52239ea5bd525392ad7150461d03448
SHA512 (llhttp-6.0.6.tar.gz) = 6d621aafcf8b0fcddfb8ceb04b69caa4c79f4b955c9548ee8616290a538fcbdd3b2f1f1d35c6609e03d49de01db2b771a60e38fd7f277dd89b5f1a0abc0c31ae
SHA512 (llhttp-6.0.6-generated.tar.xz) = 845a7b159e2891f4ab58546a8cfbb4c943cdf2d8490d1d2d51f4164fb901a94ee37fa07a874cfbecab741cecedd0cf87d34a79490351a23856bf6731984abd77