Unverified Commit 2cbbf1ea authored by Nicola Soranzo's avatar Nicola Soranzo
Browse files

Check GitHub API rate limit in GitHubSearch. Deprecate `get_json()` search

parent 58d7e3cd
Loading
Loading
Loading
Loading
+31 −7
Original line number Diff line number Diff line
@@ -5,6 +5,10 @@ import json
import logging
import sys
import tempfile
from datetime import (
    datetime,
    timezone,
)

import requests

@@ -140,12 +144,29 @@ class GitHubSearch():
    Tool to search the GitHub bioconda-recipes repo
    """

    @staticmethod
    def _check_response_rate_limit(response):
        if response.status_code == 403 and "API rate limit exceeded" in response.json()["message"]:
            # It can take tens of minutes before the rate limit window resets
            message = "GitHub API rate limit exceeded."
            rate_limit_reset_UTC_timestamp = response.headers.get("X-RateLimit-Reset")
            if rate_limit_reset_UTC_timestamp:
                rate_limit_reset_datetime = datetime.fromtimestamp(int(rate_limit_reset_UTC_timestamp), tz=timezone.utc)
                message += f" The rate limit window will reset at {rate_limit_reset_datetime.isoformat()}."
            raise Exception(message)

    def get_json(self, search_string):
        """
        Takes search_string variable and return results from the bioconda-recipes github repository in JSON format

        DEPRECATED: this method is currently unreliable because the API query
        sometimes succeeds but returns no items.
        """
        response = requests.get(
            f"https://api.github.com/search/code?q={search_string}+in:path+repo:bioconda/bioconda-recipes+path:recipes", timeout=MULLED_SOCKET_TIMEOUT)
            f"https://api.github.com/search/code?q={search_string}+in:path+repo:bioconda/bioconda-recipes+path:recipes",
            timeout=MULLED_SOCKET_TIMEOUT,
        )
        self._check_response_rate_limit(response)
        response.raise_for_status()
        return response.json()

@@ -165,10 +186,12 @@ class GitHubSearch():
        """
        Check if a recipe exists in bioconda-recipes which matches search_string exactly
        """
        if requests.get("https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/%s" % search_string, timeout=MULLED_SOCKET_TIMEOUT).status_code == 200:
            return True
        else:
            return False
        response = requests.get(
            f"https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/{search_string}",
            timeout=MULLED_SOCKET_TIMEOUT,
        )
        self._check_response_rate_limit(response)
        return response.status_code == 200


def get_package_hash(packages, versions):
@@ -339,10 +362,11 @@ def main(argv=None):
        github = GitHubSearch()

        for item in args.search:
            github_json = github.get_json(item)
            github_results[item] = github.process_json(github_json, item)
            if github.recipe_present(item):
                github_recipe_present.append(item)
            else:
                github_json = github.get_json(item)
                github_results[item] = github.process_json(github_json, item)

        json_results['github'] = github_results
        json_results['github_recipe_present'] = {
+14 −5
Original line number Diff line number Diff line
@@ -27,12 +27,21 @@ def test_conda_search():


@external_dependency_management
def test_github_search():
def test_github_recipe_present():
    t = GitHubSearch()
    search1 = t.process_json(t.get_json("adsfasdf"), "adsfasdf")
    search2 = t.process_json(t.get_json("bioconductor-gosemsim"), "bioconductor-gosemsim")
    assert search1 == []
    assert {'path': 'recipes/bioconductor-gosemsim/meta.yaml', 'name': 'meta.yaml'} in search2

    search_string2expected = {
        "adsfasdf": False,
        "bioconductor-gosemsim": True,
    }
    for search_string, expected in search_string2expected.items():
        try:
            is_recipe_present = t.recipe_present(search_string)
        except Exception as e:
            if "API rate limit" in str(e):
                pytest.skip("Hitting GitHub API rate limit")
            raise
        assert is_recipe_present is expected


@external_dependency_management