#!/bin/python3

# This script looks for the 'latest' comps-fXX.xml.in file, assumes it's
# for Rawhide, and looks through its `pkgreq` lines for ones that specify
# packages that do not currently exist in Rawhide. It is arch-aware. It
# expects to be run on a Fedora system with network access, as it will
# try to query the 'rawhide' dnf repo to get lists of currently-existing
# packages.

import glob
import argparse
import subprocess
import lxml.etree as ET
from collections import defaultdict

ARCHES = ('aarch64', 'armv7hl', 'ppc64le', 's390x', 'x86_64')

parser = argparse.ArgumentParser(description='Check the comps file for missing packages and packages missing on architectures')
parser.add_argument('--update', dest='update', action='store_true', default=False,
                    help='Update the comps file with the changes')
args = parser.parse_args()

# gather package lists. this eats lots of RAM. I don't care.
pkgs = {}
for arch in ARCHES:
    pkgtext = subprocess.run(('dnf', '--forcearch={}'.format(arch), '--disablerepo=*', '--enablerepo=rawhide', 'repoquery', '--qf=%{NAME}'), capture_output=True, text=True).stdout
    pkgs[arch] = pkgtext.splitlines()

# find the *latest* comps file (assume it's rawhide)
compsfiles = glob.glob('comps-f*.xml.in')
latest = sorted(compsfiles, key=lambda x: int(''.join(c for c in x if c.isdigit())))[-1]


# find package reqs in comps
tree = ET.parse(latest) #, ET.XMLParser(target=CommentedTreeBuilder()))
root = tree.getroot()
pkgreqs = root.findall('.//packagereq')

# Check if each package is in the repository for each architecture
removedpkgs = defaultdict(list)
archpkgs = defaultdict(list)
for pkgreq in pkgreqs:
    # list of arches the package is missing on
    missing = []
    present = []

    # arches the package is listed for (if no 'arch' key, it's listed for all)
    reqarches = pkgreq.get('arch', '').replace('armhfp', 'armv7hl').replace('ppc64,','')
    if reqarches:
        reqarches = reqarches.split(',')
    else:
        reqarches = ARCHES

    # do the actual check, append arch to 'missing' if it's not there
    for arch in reqarches:
        if arch in pkgs and pkgreq.text not in pkgs[arch]:
            missing.append(arch)
        else:
            present.append(arch)

    grpid = pkgreq.find('./../../id').text
    pkgname = pkgreq.text

    # print the result
    if missing == list(ARCHES):
        if pkgreq.getparent() is not None:
            removedpkgs[pkgname].append(grpid)
            pkgreq.getparent().remove(pkgreq)
    elif missing:
        archpkgs[pkgname] = ', '.join(present)

# Find empty groups after packages not in repositories have been removed
pkglists = root.findall('.//packagelist')
removedgrps = {}
for pkglist in pkglists:
    if not len(pkglist):
        group = pkglist.getparent()
        grpid = group.find('./id').text
        removedgrps[grpid] = []
        group.getparent().remove(group)


# Remove any empty groups from the environment lists
envlists = root.findall('.//environment//groupid')
for envgrp in envlists:
    grpid = envgrp.text
    if grpid in removedgrps:
        # The groups are inside a grouplist inside the environment
        par = envgrp.getparent()
        envid = par.getparent().find('./id').text
        removedgrps[grpid].append(envid)
        par.remove(envgrp)


# Remove any empty groups from the category lists
catlists = root.findall('.//category//groupid')
for catgrp in catlists:
    grpid = catgrp.text
    if grpid in removedgrps:
        # The groups are inside a grouplist inside the category
        par = catgrp.getparent()
        catid = par.getparent().find('./id').text
        removedgrps[grpid].append(catid)
        par.remove(catgrp)


# Remove any language packs for packages that don't exist anymore
langpacks = root.find('.//langpacks')
removedlang = []
for lang in langpacks.getchildren():
    pkg = lang.get('name')
    if pkg in list(removedpkgs):
        removedlang.append(pkg)
        lang.getparent().remove(lang)

# Print out a summary
print('Packages with incorrect architecture tags:')
for pkg in sorted(archpkgs):
    print('    {} only available on {}'.format(pkg, archpkgs[pkg]))

print('\nRemoving packages:')
for pkg in sorted(removedpkgs):
    print('    {} in group {}'.format(pkg, ', '.join(removedpkgs[pkg])))

print('\nRemoving empty groups:')
for group in sorted(removedgrps):
    print('    {} in {}'.format(group, ', '.join(removedgrps[group])))

print('\nRemoving language packs for:')
for lang in removedlang:
    print('    {}'.format(lang))


# Write out the updated XML file if desired
if args.update:
    tree.write(latest, encoding="UTF-8", xml_declaration=True)