Unverified Commit 6176b007 authored by Ahmed Hamid Awan's avatar Ahmed Hamid Awan Committed by GitHub
Browse files

Merge pull request #20107 from natefoo/fix-mulled-search-quay

[24.2] Fix `mulled-search --destination quay`, add index reuse
parents ddc07c90 f66a9419
Loading
Loading
Loading
Loading
+58 −20
Original line number Diff line number Diff line
@@ -3,8 +3,10 @@
import argparse
import json
import logging
import os
import sys
import tempfile
import time
from typing import (
    Dict,
    List,
@@ -29,7 +31,10 @@ try:
        STORED,
        TEXT,
    )
    from whoosh.index import create_in
    from whoosh.index import (
        create_in,
        open_dir,
    )
    from whoosh.qparser import QueryParser
except ImportError:
    Schema = TEXT = STORED = create_in = QueryParser = None
@@ -43,28 +48,60 @@ class QuaySearch:
    Tool to search within a quay organization for a given software name.
    """

    def __init__(self, organization):
    tmp_dir_prefix = "mulled_search.quay."

    def __init__(self, organization, cache_time=900):
        self.index = None
        self.organization = organization
        self.cache_time = cache_time

    def build_index(self):
        """
        Create an index to quickly examine the repositories of a given quay.io organization.
        """
    def paginator(self):
        # download all information about the repositories from the
        # given organization in self.organization

        json_decoder = json.JSONDecoder()
        parameters = {"public": "true", "namespace": self.organization}
        next_page = ""
        while next_page is not None:
            if next_page:
                parameters["next_page"] = next_page
            r = requests.get(
                QUAY_API_URL, headers={"Accept-encoding": "gzip"}, params=parameters, timeout=MULLED_SOCKET_TIMEOUT
            )
        tmp_dir = tempfile.mkdtemp()
            r.raise_for_status()
            decoded_request = json_decoder.decode(r.text)
            next_page = decoded_request.get("next_page")
            yield decoded_request

    def build_index(self):
        """
        Create an index to quickly examine the repositories of a given quay.io organization.
        """
        entries = []
        uid = os.getuid()
        now = time.time()
        for entry in os.scandir(tempfile.gettempdir()):
            if not entry.name.startswith(QuaySearch.tmp_dir_prefix):
                continue
            if not entry.is_dir() or entry.is_symlink():
                continue
            entry_st = entry.stat()
            if entry_st.st_uid != uid:
                continue
            if entry_st.st_mtime < (now - self.cache_time):
                continue
            entries.append((entry_st.st_mtime, entry.path))
        if entries:
            tmp_dir = sorted(entries)[-1][1]
            print(
                f"Using existing quay.io index, remove or decrease --cache-time to rebuild: {tmp_dir}", file=sys.stderr
            )
            self.index = open_dir(tmp_dir)
        else:
            tmp_dir = tempfile.mkdtemp(prefix=QuaySearch.tmp_dir_prefix)
            schema = Schema(title=TEXT(stored=True), content=STORED)
            self.index = create_in(tmp_dir, schema)

        json_decoder = json.JSONDecoder()
        decoded_request = json_decoder.decode(r.text)
            writer = self.index.writer()
            for decoded_request in self.paginator():
                for repository in decoded_request["repositories"]:
                    writer.add_document(title=repository["name"], content=repository["description"])
            writer.commit()
@@ -386,6 +423,7 @@ def main(argv=None):
        help="Autocorrection of typos activated. Lists more results but can be confusing.\
                        For too many queries quay.io blocks the request and the results can be incomplete.",
    )
    parser.add_argument("--cache-time", type=int, default=900, help="Number of seconds to reuse cached results for")
    parser.add_argument("-j", "--json", dest="json", action="store_true", help="Returns results as JSON.")
    parser.add_argument("-s", "--search", required=True, nargs="+", help="The name of the tool(s) to search for.")

@@ -421,7 +459,7 @@ def main(argv=None):

    if "quay" in args.search_dest:
        quay_results = {}
        quay = QuaySearch(args.organization_string)
        quay = QuaySearch(args.organization_string, cache_time=args.cache_time)
        quay.build_index()

        for item in args.search: