Unverified Commit 246f42ec authored by Wolfgang Walther's avatar Wolfgang Walther Committed by GitHub
Browse files

workflows/labels: label stale issues (reapply) (#421839)

parents 94c323d6 06a88df6
Loading
Loading
Loading
Loading

.github/stale.yml

deleted100644 → 0
+0 −9
Original line number Diff line number Diff line
# Configuration for probot-stale - https://github.com/probot/stale
daysUntilStale: 180
daysUntilClose: false
exemptLabels:
  - "1.severity: security"
  - "2.status: never-stale"
staleLabel: "2.status: stale"
markComment: false
closeComment: false
+205 −166
Original line number Diff line number Diff line
@@ -27,10 +27,8 @@ concurrency:

# This is used as fallback without app only.
# This happens when testing in forks without setting up that app.
# Labels will most likely not exist in forks, yet. For this case,
# we add the issues permission only here.
permissions:
  issues: write # needed to create *new* labels
  issues: write
  pull-requests: write

defaults:
@@ -52,8 +50,7 @@ jobs:
        with:
          app-id: ${{ vars.NIXPKGS_CI_APP_ID }}
          private-key: ${{ secrets.NIXPKGS_CI_APP_PRIVATE_KEY }}
          # No issues: write permission here, because labels in Nixpkgs should
          # be created explicitly via the UI with color and description.
          permission-issues: write
          permission-pull-requests: write

      - name: Log current API rate limits
@@ -70,11 +67,12 @@ jobs:
            const Bottleneck = require('bottleneck')
            const path = require('node:path')
            const { DefaultArtifactClient } = require('@actions/artifact')
            const { readFile } = require('node:fs/promises')
            const { readFile, writeFile } = require('node:fs/promises')

            const artifactClient = new DefaultArtifactClient()

            const stats = {
              issues: 0,
              prs: 0,
              requests: 0,
              artifacts: 0
@@ -123,19 +121,10 @@ jobs:
            // Update remaining requests every minute to account for other jobs running in parallel.
            const reservoirUpdater = setInterval(updateReservoir, 60 * 1000)

            async function handle(item) {
              try {
                const log = (k,v,skip) => {
                  core.info(`#${item.number} - ${k}: ${v}` + (skip ? ' (skipped)' : ''))
                  return skip
                }

                log('Last updated at', item.updated_at)
                stats.prs++
                log('URL', item.html_url)
            async function handlePullRequest(item) {
              const log = (k,v) => core.info(`PR #${item.number} - ${k}: ${v}`)

              const pull_number = item.number
                const issue_number = item.number

              // This API request is important for the merge-conflict label, because it triggers the
              // creation of a new test merge commit. This is needed to actually determine the state of a PR.
@@ -144,6 +133,42 @@ jobs:
                pull_number
              })).data

              const approvals = new Set(
                (await github.paginate(github.rest.pulls.listReviews, {
                  ...context.repo,
                  pull_number
                }))
                .filter(review => review.state == 'APPROVED')
                .map(review => review.user?.id)
              )

              // After creation of a Pull Request, `merge_commit_sha` will be null initially:
              // The very first merge commit will only be calculated after a little while.
              // To avoid labeling the PR as conflicted before that, we wait a few minutes.
              // This is intentionally less than the time that Eval takes, so that the label job
              // running after Eval can indeed label the PR as conflicted if that is the case.
              const merge_commit_sha_valid = new Date() - new Date(pull_request.created_at) > 3 * 60 * 1000

              const prLabels = {
                // We intentionally don't use the mergeable or mergeable_state attributes.
                // Those have an intermediate state while the test merge commit is created.
                // This doesn't work well for us, because we might have just triggered another
                // test merge commit creation by request the pull request via API at the start
                // of this function.
                // The attribute merge_commit_sha keeps the old value of null or the hash *until*
                // the new test merge commit has either successfully been created or failed so.
                // This essentially means we are updating the merge conflict label in two steps:
                // On the first pass of the day, we just fetch the pull request, which triggers
                // the creation. At this stage, the label is likely not updated, yet.
                // The second pass will then read the result from the first pass and set the label.
                '2.status: merge conflict': merge_commit_sha_valid && !pull_request.merge_commit_sha,
                '12.approvals: 1': approvals.size == 1,
                '12.approvals: 2': approvals.size == 2,
                '12.approvals: 3+': approvals.size >= 3,
                '12.first-time contribution':
                  [ 'NONE', 'FIRST_TIMER', 'FIRST_TIME_CONTRIBUTOR' ].includes(pull_request.author_association),
              }

              const run_id = (await github.rest.actions.listWorkflowRuns({
                ...context.repo,
                workflow_id: 'pr.yml',
@@ -191,26 +216,50 @@ jobs:
                  path: path.resolve(pull_number.toString()),
                  expectedHash: artifact.digest
                })
                }

                // Create a map (Label -> Boolean) of all currently set labels.
                // Each label is set to True and can be disabled later.
                const before = Object.fromEntries(
                  (await github.paginate(github.rest.issues.listLabelsOnIssue, {
                    ...context.repo,
                    issue_number
                  }))
                  .map(({ name }) => [name, true])
                )
                const maintainers = new Set(Object.keys(
                  JSON.parse(await readFile(`${pull_number}/maintainers.json`, 'utf-8'))
                ).map(m => Number.parseInt(m, 10)))

                const approvals = new Set(
                  (await github.paginate(github.rest.pulls.listReviews, {
                    ...context.repo,
                    pull_number
                  }))
                  .filter(review => review.state == 'APPROVED')
                  .map(review => review.user?.id)
                const evalLabels = JSON.parse(await readFile(`${pull_number}/changed-paths.json`, 'utf-8')).labels

                Object.assign(
                  prLabels,
                  // Ignore `evalLabels` if it's an array.
                  // This can happen for older eval runs, before we switched to objects.
                  // The old eval labels would have been set by the eval run,
                  // so now they'll be present in `before`.
                  // TODO: Simplify once old eval results have expired (~2025-10)
                  (Array.isArray(evalLabels) ? undefined : evalLabels),
                  {
                    '12.approved-by: package-maintainer': Array.from(maintainers).some(m => approvals.has(m)),
                  }
                )
              }

              return prLabels
            }

            async function handle(item) {
              try {
                const log = (k,v,skip) => {
                  core.info(`#${item.number} - ${k}: ${v}` + (skip ? ' (skipped)' : ''))
                  return skip
                }

                log('Last updated at', item.updated_at)
                log('URL', item.html_url)

                const issue_number = item.number

                const itemLabels = {}

                if (item.pull_request) {
                  stats.prs++
                  Object.assign(itemLabels, await handlePullRequest(item))
                } else {
                  stats.issues++
                }

                const latest_event_at = new Date(
                  (await github.paginate(
@@ -250,60 +299,21 @@ jobs:

                const stale_at = new Date(new Date().setDate(new Date().getDate() - 180))

                // After creation of a Pull Request, `merge_commit_sha` will be null initially:
                // The very first merge commit will only be calculated after a little while.
                // To avoid labeling the PR as conflicted before that, we wait a few minutes.
                // This is intentionally less than the time that Eval takes, so that the label job
                // running after Eval can indeed label the PR as conflicted if that is the case.
                const merge_commit_sha_valid = new Date() - new Date(pull_request.created_at) > 3 * 60 * 1000

                // Manage most of the labels, without eval results
                const after = Object.assign(
                  {},
                  before,
                  {
                    // We intentionally don't use the mergeable or mergeable_state attributes.
                    // Those have an intermediate state while the test merge commit is created.
                    // This doesn't work well for us, because we might have just triggered another
                    // test merge commit creation by request the pull request via API at the start
                    // of this function.
                    // The attribute merge_commit_sha keeps the old value of null or the hash *until*
                    // the new test merge commit has either successfully been created or failed so.
                    // This essentially means we are updating the merge conflict label in two steps:
                    // On the first pass of the day, we just fetch the pull request, which triggers
                    // the creation. At this stage, the label is likely not updated, yet.
                    // The second pass will then read the result from the first pass and set the label.
                    '2.status: merge conflict': merge_commit_sha_valid && !pull_request.merge_commit_sha,
                    '2.status: stale': !before['1.severity: security'] && latest_event_at < stale_at,
                    '12.approvals: 1': approvals.size == 1,
                    '12.approvals: 2': approvals.size == 2,
                    '12.approvals: 3+': approvals.size >= 3,
                    '12.first-time contribution':
                      [ 'NONE', 'FIRST_TIMER', 'FIRST_TIME_CONTRIBUTOR' ].includes(pull_request.author_association),
                  }
                // Create a map (Label -> Boolean) of all currently set labels.
                // Each label is set to True and can be disabled later.
                const before = Object.fromEntries(
                  (await github.paginate(github.rest.issues.listLabelsOnIssue, {
                    ...context.repo,
                    issue_number
                  }))
                  .map(({ name }) => [name, true])
                )

                // Manage labels based on eval results
                if (!expired) {
                  const maintainers = new Set(Object.keys(
                    JSON.parse(await readFile(`${pull_number}/maintainers.json`, 'utf-8'))
                  ).map(m => Number.parseInt(m, 10)))

                  const evalLabels = JSON.parse(await readFile(`${pull_number}/changed-paths.json`, 'utf-8')).labels
                Object.assign(itemLabels, {
                  '2.status: stale': !before['1.severity: security'] && latest_event_at < stale_at,
                })

                  Object.assign(
                    after,
                    // Ignore `evalLabels` if it's an array.
                    // This can happen for older eval runs, before we switched to objects.
                    // The old eval labels would have been set by the eval run,
                    // so now they'll be present in `before`.
                    // TODO: Simplify once old eval results have expired (~2025-10)
                    (Array.isArray(evalLabels) ? undefined : evalLabels),
                    {
                      '12.approved-by: package-maintainer': Array.from(maintainers).some(m => approvals.has(m)),
                    }
                  )
                }
                const after = Object.assign({}, before, itemLabels)

                // No need for an API request, if all labels are the same.
                const hasChanges = Object.keys(after).some(name => (before[name] ?? false) != after[name])
@@ -329,19 +339,19 @@ jobs:
              if (context.payload.pull_request) {
                await handle(context.payload.pull_request)
              } else {
                const workflowData = (await github.rest.actions.listWorkflowRuns({
                const lastRun = (await github.rest.actions.listWorkflowRuns({
                  ...context.repo,
                  workflow_id: 'labels.yml',
                  event: 'schedule',
                  status: 'success',
                  exclude_pull_requests: true,
                  per_page: 1
                })).data
                })).data.workflow_runs[0]

                // Go back as far as the last successful run of this workflow to make sure
                // we are not leaving anyone behind on GHA failures.
                // Defaults to go back 1 hour on the first run.
                const cutoff = new Date(workflowData.workflow_runs[0]?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000)
                const cutoff = new Date(lastRun?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000)
                core.info('cutoff timestamp: ' + cutoff.toISOString())

                const updatedItems = await github.paginate(
@@ -349,7 +359,6 @@ jobs:
                  {
                    q: [
                      `repo:"${process.env.GITHUB_REPOSITORY}"`,
                      'type:pr',
                      'is:open',
                      `updated:>=${cutoff.toISOString()}`
                    ].join(' AND '),
@@ -358,50 +367,80 @@ jobs:
                  }
                )

                // The search endpoint only allows fetching the first 1000 records, but the
                // pull request list endpoint does not support counting the total number
                // of results.
                // Thus, we use /search for counting and /pulls for reading the response.
                const { total_count: total_pulls } = (await github.rest.search.issuesAndPullRequests({
                  q: [
                    `repo:"${process.env.GITHUB_REPOSITORY}"`,
                    'type:pr',
                    'is:open'
                  ].join(' AND '),
                  sort: 'created',
                  direction: 'asc',
                  // TODO: Remove in 2025-10, when it becomes the default.
                  advanced_search: true,
                  per_page: 1
                })).data
                const { total_count: total_runs } = workflowData
                let cursor

                const allPulls = (await github.rest.pulls.list({
                // No workflow run available the first time.
                if (lastRun) {
                  // The cursor to iterate through the full list of issues and pull requests
                  // is passed between jobs as an artifact.
                  const artifact = (await github.rest.actions.listWorkflowRunArtifacts({
                    ...context.repo,
                    run_id: lastRun.id,
                    name: 'pagination-cursor'
                  })).data.artifacts[0]

                  // If the artifact is not available, the next iteration starts at the beginning.
                  if (artifact) {
                    stats.artifacts++

                    const { downloadPath } = await artifactClient.downloadArtifact(artifact.id, {
                      findBy: {
                        repositoryName: context.repo.repo,
                        repositoryOwner: context.repo.owner,
                        token: core.getInput('github-token')
                      },
                      expectedHash: artifact.digest
                    })

                    cursor = await readFile(path.resolve(downloadPath, 'cursor'), 'utf-8')
                  }
                }

                // From GitHub's API docs:
                //   GitHub's REST API considers every pull request an issue, but not every issue is a pull request.
                //   For this reason, "Issues" endpoints may return both issues and pull requests in the response.
                //   You can identify pull requests by the pull_request key.
                const allItems = await github.rest.issues.listForRepo({
                  ...context.repo,
                  state: 'open',
                  sort: 'created',
                  direction: 'asc',
                  per_page: 100,
                  // We iterate through pages of 100 items across scheduled runs. With currently ~7000 open PRs and
                  // up to 6*24=144 scheduled runs per day, we hit every PR twice each day.
                  // We might not hit every PR on one iteration, because the pages will shift slightly when
                  // PRs are closed or merged. We assume this to be OK on the bigger scale, because a PR which was
                  // missed once, would have to move through the whole page to be missed again. This is very unlikely,
                  // so it should certainly be hit on the next iteration.
                  // TODO: Evaluate after a while, whether the above holds still true and potentially implement
                  // an overlap between runs.
                  page: (total_runs % Math.ceil(total_pulls / 100)) + 1
                })).data
                  after: cursor
                })

                // Regex taken and comment adjusted from:
                // https://github.com/octokit/plugin-paginate-rest.js/blob/8e5da25f975d2f31dda6b8b588d71f2c768a8df2/src/iterator.ts#L36-L41
                // `allItems.headers.link` format:
                //   <https://api.github.com/repositories/4542716/issues?page=3&per_page=100&after=Y3Vyc29yOnYyOpLPAAABl8qNnYDOvnSJxA%3D%3D>; rel="next",
                //   <https://api.github.com/repositories/4542716/issues?page=1&per_page=100&before=Y3Vyc29yOnYyOpLPAAABl8xFV9DOvoouJg%3D%3D>; rel="prev"
                // Sets `next` to undefined if "next" URL is not present or `link` header is not set.
                const next = ((allItems.headers.link ?? '').match(/<([^<>]+)>;\s*rel="next"/) ?? [])[1]
                if (next) {
                  cursor = new URL(next).searchParams.get('after')
                  const uploadPath = path.resolve('cursor')
                  await writeFile(uploadPath, cursor, 'utf-8')
                  // No stats.artifacts++, because this does not allow passing a custom token.
                  // Thus, the upload will not happen with the app token, but the default github.token.
                  await artifactClient.uploadArtifact(
                    'pagination-cursor',
                    [uploadPath],
                    path.resolve('.'),
                    {
                      retentionDays: 1
                    }
                  )
                }

                // Some items might be in both search results, so filtering out duplicates as well.
                const items = [].concat(updatedItems, allPulls)
                const items = [].concat(updatedItems, allItems.data)
                  .filter((thisItem, idx, arr) => idx == arr.findIndex(firstItem => firstItem.number == thisItem.number))

                ;(await Promise.allSettled(items.map(handle)))
                  .filter(({ status }) => status == 'rejected')
                  .map(({ reason }) => core.setFailed(`${reason.message}\n${reason.cause.stack}`))

                core.notice(`Processed ${stats.prs} PRs, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`)
                core.notice(`Processed ${stats.prs} PRs, ${stats.issues} Issues, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`)
              }
            } finally {
              clearInterval(reservoirUpdater)