#!/bin/python3 # This script looks for the 'latest' comps-fXX.xml.in file, assumes it's # for Rawhide, and looks through its `pkgreq` lines for ones that specify # packages that do not currently exist in Rawhide. It is arch-aware. It # expects to be run on a Fedora system with network access, as it will # try to query the 'rawhide' dnf repo to get lists of currently-existing # packages. import glob import argparse import subprocess import lxml.etree as ET from collections import defaultdict ARCHES = ('aarch64', 'armv7hl', 'ppc64le', 's390x', 'x86_64') parser = argparse.ArgumentParser(description='Check the comps file for missing packages and packages missing on architectures') parser.add_argument('--update', dest='update', action='store_true', default=False, help='Update the comps file with the changes') args = parser.parse_args() # gather package lists. this eats lots of RAM. I don't care. pkgs = {} for arch in ARCHES: pkgtext = subprocess.run(('dnf', '--forcearch={}'.format(arch), '--disablerepo=*', '--enablerepo=rawhide', 'repoquery', '--qf=%{NAME}'), capture_output=True, text=True).stdout pkgs[arch] = pkgtext.splitlines() # find the *latest* comps file (assume it's rawhide) compsfiles = glob.glob('comps-f*.xml.in') latest = sorted(compsfiles, key=lambda x: int(''.join(c for c in x if c.isdigit())))[-1] # find package reqs in comps tree = ET.parse(latest) #, ET.XMLParser(target=CommentedTreeBuilder())) root = tree.getroot() pkgreqs = root.findall('.//packagereq') # Check if each package is in the repository for each architecture removedpkgs = defaultdict(list) archpkgs = defaultdict(list) for pkgreq in pkgreqs: # list of arches the package is missing on missing = [] present = [] # arches the package is listed for (if no 'arch' key, it's listed for all) reqarches = pkgreq.get('arch', '').replace('armhfp', 'armv7hl').replace('ppc64,','') if reqarches: reqarches = reqarches.split(',') else: reqarches = ARCHES # do the actual check, append arch to 'missing' if it's not there for arch in reqarches: if arch in pkgs and pkgreq.text not in pkgs[arch]: missing.append(arch) else: present.append(arch) grpid = pkgreq.find('./../../id').text pkgname = pkgreq.text # print the result if missing == list(ARCHES): if pkgreq.getparent() is not None: removedpkgs[pkgname].append(grpid) pkgreq.getparent().remove(pkgreq) elif missing: archpkgs[pkgname] = ', '.join(present) # Find empty groups after packages not in repositories have been removed pkglists = root.findall('.//packagelist') removedgrps = {} for pkglist in pkglists: if not len(pkglist): group = pkglist.getparent() grpid = group.find('./id').text removedgrps[grpid] = [] group.getparent().remove(group) # Remove any empty groups from the environment lists envlists = root.findall('.//environment//groupid') for envgrp in envlists: grpid = envgrp.text if grpid in removedgrps: # The groups are inside a grouplist inside the environment par = envgrp.getparent() envid = par.getparent().find('./id').text removedgrps[grpid].append(envid) par.remove(envgrp) # Remove any empty groups from the category lists catlists = root.findall('.//category//groupid') for catgrp in catlists: grpid = catgrp.text if grpid in removedgrps: # The groups are inside a grouplist inside the category par = catgrp.getparent() catid = par.getparent().find('./id').text removedgrps[grpid].append(catid) par.remove(catgrp) # Remove any language packs for packages that don't exist anymore langpacks = root.find('.//langpacks') removedlang = [] for lang in langpacks.getchildren(): pkg = lang.get('name') if pkg in list(removedpkgs): removedlang.append(pkg) lang.getparent().remove(lang) # Print out a summary print('Packages with incorrect architecture tags:') for pkg in sorted(archpkgs): print(' {} only available on {}'.format(pkg, archpkgs[pkg])) print('\nRemoving packages:') for pkg in sorted(removedpkgs): print(' {} in group {}'.format(pkg, ', '.join(removedpkgs[pkg]))) print('\nRemoving empty groups:') for group in sorted(removedgrps): print(' {} in {}'.format(group, ', '.join(removedgrps[group]))) print('\nRemoving language packs for:') for lang in removedlang: print(' {}'.format(lang)) # Write out the updated XML file if desired if args.update: tree.write(latest, encoding="UTF-8", xml_declaration=True)