SPDX migration and introduction of bundled_licenses.py

The Python script bundled_licenses.py should help identifying
licenses used in the bundled deps. It simply parses package.json
files in the given directory and returns best guess about the
License: RPM tag.

The expected usage is like this:
* run bundled_licenses.py on the binary RPMs to see what is
  bundled in the shipped RPMs
* validate the output of bundled_licenses.py
* add licenses identiefied in the source code of nodejs itself
* validate the resulting License tag suggestion by license-validate tool
This commit is contained in:
Honza Horak 2024-02-09 12:18:24 +01:00
parent 2961a50035
commit 06bf5fc3d1
2 changed files with 100 additions and 1 deletions

99
bundled_licenses.py Executable file
View File

@ -0,0 +1,99 @@
#!/bin/env python3
# Copyright 2024 Red Hat
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.
# A Python script bundled_licenses.py should help identifying
# licenses used in the bundled deps. It simply parses package.json
# files in the given directory and returns best guess about the
# License: RPM tag.
#
# The expected usage is like this:
# * run bundled_licenses.py on the binary RPMs to see what is
# bundled in the shipped RPMs
# * validate the output of bundled_licenses.py
# * add licenses identiefied in the source code of nodejs itself
# * validate the resulting License tag suggestion by license-validate tool
import argparse
import os
import json
def find_package_json(directories):
# List to store file paths matching the pattern
file_paths = []
# Walk through the directories and their subdirectories
for directory in directories:
for root, dirs, files in os.walk(directory):
for file in files:
if file == 'package.json':
# package.json under a directory test/fixtures are usually not relevant
file_path = os.path.join(root, file)
if 'test/fixtures' in file_path:
print(f"Warning: Skipping {file_path} as it seems to be a fixture that is likely not valid.")
continue
print(f"Found package.json at: {file_path}")
file_paths.append(file_path)
return file_paths
def parse_license_tag(license_tag):
if type(license_tag) == dict and 'type' in license_tag:
return license_tag['type']
return license_tag
def fix_known_spdx_issues(license):
if license == 'Apache 2.0':
return 'Apache-2.0'
return license
def license_from_package_json(file_path):
with open(file_path, 'r') as json_file:
try:
data = json.load(json_file)
if 'license' in data:
return parse_license_tag(data['license'])
elif 'licenses' in data:
return ' AND '.join([parse_license_tag(license) for license in data['licenses']])
else:
if 'name' in data and 'version' in data:
print(f"Error: Key license not found in {file_path} despite it looks like a valid package.json file")
else:
print(f"Warning: Key license not found in {file_path} but it might not be a valid package.json file at all")
except json.JSONDecodeError as e:
print(f"Error parsing {file_path}: {e}")
return None
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Find and parse package.json '\
'files in specified directories, and return probable License tag to '\
'be used in an RPM spec file. It is likely better to run it on '\
'unpackged or installed RPMs, rather than unpacked source, because '\
'some of the bundled deps are not likely used in the shipped output. '\
'It is always necessary to manually verify the results by investigating '\
'the files and the resulting License tag may be verified using '\
'license-validate tool.')
parser.add_argument('directories', nargs='*', default=[os.getcwd()],
help='Directories to search for package.json files, if no is given, use the current directory.')
args = parser.parse_args()
licenses = set()
package_json_files = find_package_json(args.directories)
for f in package_json_files:
l = license_from_package_json(f)
if l:
licenses.add(fix_known_spdx_issues(l))
print(f"OK: License detected in {f}: {l}")
print('Final license tag to be used in the RPM spec file (please, confirm manually '\
'buy looking into files and validate using license-validate tool):')
print('Lincese: ' + ' AND '.join(sorted(licenses)))

View File

@ -122,7 +122,7 @@ Epoch: %{nodejs_epoch}
Version: %{nodejs_version}
Release: %{nodejs_release}
Summary: JavaScript runtime
License: MIT and ASL 2.0 and ISC and BSD
License: Apache-2.0 AND Artistic-2.0 AND BSD-2-Clause AND BSD-3-Clause AND BlueOak-1.0.0 AND CC-BY-3.0 AND CC0-1.0 AND ISC AND MIT
Group: Development/Languages
URL: http://nodejs.org/