Commit b77640c2 authored by Matt Pryor's avatar Matt Pryor
Browse files

Update publisher to use new config structure

parent 7ea0a8cb
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -88,12 +88,19 @@ services:
    image: "${ESGF_HUB}/${ESGF_PREFIX}publisher:${ESGF_VERSION}"
    build: ./publisher
    environment:
      ESGF_HOSTNAME:
      ESGF_DATABASE_HOST: esgf-postgres-esgcet
      ESGF_DATABASE_PORT: "5432"
      ESGF_DATABASE_USER: esgcet
      ESGF_DATABASE_PASSWORD_FILE: /esg/secrets/esgcet-database-password
      ESGF_DATABASE_NAME: esgcet
      ESGF_HESSIAN_METADATA_URL: "https://${ESGF_HOSTNAME}/esgcet/remote/hessian/guest/remoteMetadataService"
      ESGF_HESSIAN_URL: "https://${ESGF_HOSTNAME}/esg-search/remote/secure/client-cert/hessian/publishingService"
      ESGF_TDS_CATALOG_URL: "https://${ESGF_HOSTNAME}/thredds/catalog/esgcet"
      ESGF_TDS_REINIT_URL: "https://${ESGF_HOSTNAME}/thredds/admin/debug?Catalogs/recheck"
      ESGF_TDS_REINIT_ERROR_URL: "https://${ESGF_HOSTNAME}/thredds/admin/content/logs/catalogInit.log"
      ESGF_TDS_USERNAME: rootAdmin
      ESGF_TDS_PASSWORD_FILE: /esg/secrets/rootadmin-password
      ESGF_SLCS_CERTIFICATE_URL: "https://${ESGF_HOSTNAME}/esgf-slcs/onlineca/certificate/"
    volumes:
      - "$ESGF_DATA:/esg/data"
      - "tds-content:/esg/content/thredds/esgcet"
@@ -102,6 +109,8 @@ services:
      - "$ESGF_CONFIG/secrets/rootadmin-password:/esg/secrets/rootadmin-password:ro"
      # Make sure the trusted certificate bundle is available
      - "$ESGF_CONFIG/certificates/esg-trust-bundle.pem:/esg/certificates/esg-trust-bundle.pem:ro"
      # Allow the publisher configuration to be overridden
      - "$ESGF_CONFIG/publisher:/esg/config/esgcet/.overrides:ro"
    # Use a command that exits immediately with success, but don't change the entrypoint
    command: ["true"]
    depends_on:
+13 −9
Original line number Diff line number Diff line
@@ -2,6 +2,15 @@
# Docker image for the ESGF publisher client
#####

ARG ESGF_HUB=esgfhub
ARG ESGF_PREFIX=
ARG ESGF_VERSION=latest

# This build stage is required because COPY --from=$ARG is not supported
# https://github.com/moby/moby/issues/34482
FROM ${ESGF_HUB}/${ESGF_PREFIX}configure:${ESGF_VERSION} as configuration


FROM continuumio/miniconda:4.3.27

# Create an unprivileged user to use
@@ -17,11 +26,6 @@ RUN groupadd -g $PUBLISH_GID $PUBLISH_GROUP && \
ENV ESGINI /esg/config/esgcet/esg.ini
ENV UVCDAT_ANONYMOUS_LOG no

# Install gettext-base for envsubst
RUN apt-get update && \
    apt-get install -y gettext-base && \
    rm -rf /var/lib/apt/lists/*

# Install packages using conda and pip
RUN conda install -y -c conda-forge -c uvcdat \
      gcc_linux-64 \
@@ -50,12 +54,12 @@ RUN git clone https://github.com/ESGF/esg-publisher.git /application && \
# Install entrypoint script
COPY scripts/* /usr/local/bin/
# Install configuration files and templates
# Start with the /esg/bin directory from esgf-configure
COPY --chown=1001:0 --from=configuration /esg/bin /esg/bin
# Make the configuration files owned by the publish user and root group
# This means the container can be run by any user in the root group (e.g. on OpenShift)
RUN mkdir -p /esg/config/esgcet && \
    chown 1001:0 /esg/config/esgcet && \
    chmod 775 /esg/config/esgcet
COPY --chown=1001:0 conf /esg/config/esgcet
COPY --chown=1001:0 ./conf /esg/config/esgcet/.defaults
RUN chown 1001:0 /esg/config/esgcet && chmod 775 /esg/config/esgcet
# Create the thredds content root owned by publish user and root group
RUN mkdir -p /esg/content/thredds/esgcet && \
    chown -R 1001:0 /esg/content/thredds && \
+8 −8
Original line number Diff line number Diff line
[DEFAULT]
checksum = sha256sum | SHA256
dburl = postgresql://${ESGF_DATABASE_USER}:${ESGF_DATABASE_PASSWORD}@${ESGF_DATABASE_HOST}:${ESGF_DATABASE_PORT}/${ESGF_DATABASE_NAME}
dburl = {{ .Env.DATABASE_URL }}
gateway_options = ESG-PCMDI, ESG-NCAR, ESG-ORNL, ESG-BADC, ESG-NCI, ESG-DKRZ
hessian_service_debug = false
hessian_service_keyfile = /esg/config/esgcet/.globus/certificate-file
@@ -8,8 +8,8 @@ hessian_service_certfile = /esg/config/esgcet/.globus/certificate-file
hessian_service_polling_delay = 3
hessian_service_polling_iterations = 10
hessian_service_port = 443
hessian_service_remote_metadata_url = $ESGF_HESSIAN_METADATA_URL
hessian_service_url = $ESGF_HESSIAN_URL
hessian_service_remote_metadata_url = {{ .Env.ESGF_HESSIAN_METADATA_URL }}
hessian_service_url = {{ .Env.ESGF_HESSIAN_URL }}
log_format = %(levelname)-10s %(asctime)s %(message)s
log_level = WARNING
offline_lister =
@@ -26,20 +26,20 @@ thredds_file_services =
        HTTPServer | /thredds/fileServer/       | HTTPServer    | fileservice
        OpenDAP    | /thredds/dodsC/            | OpenDAPServer | fileservice
#        GridFTP    | gsiftp://my.esgf.node:2811/ | GRIDFTP       | fileservice
thredds_url = $ESGF_TDS_CATALOG_URL
thredds_username = $ESGF_TDS_USERNAME
thredds_password = $ESGF_TDS_PASSWORD
thredds_url = {{ .Env.ESGF_TDS_CATALOG_URL }}
thredds_username = {{ .Env.ESGF_TDS_USERNAME }}
thredds_password = {{ getenv "ESGF_TDS_PASSWORD" }}
thredds_authentication_realm = THREDDS Data Server
thredds_catalog_basename = %(dataset_id)s.v%(version)s.xml
thredds_master_catalog_name = Earth System Grid catalog
thredds_max_catalogs_per_directory = 500
thredds_offline_services =
#        SRM | srm://esgf-data-node:6288/srm/v2/server?SFN=/archive.sample.gov | HRMatPCMDI
thredds_reinit_error_url = $ESGF_TDS_REINIT_ERROR_URL
thredds_reinit_error_url = {{ .Env.ESGF_TDS_REINIT_ERROR_URL }}
thredds_error_pattern = Catalog init
thredds_reinit_success_pattern = reinit ok
thredds_fatal_error_pattern = **Fatal
thredds_reinit_url = $ESGF_TDS_REINIT_URL
thredds_reinit_url = {{ .Env.ESGF_TDS_REINIT_URL }}
thredds_restrict_access = esg-user
thredds_root = /esg/content/thredds/esgcet
thredds_root_catalog_name = Earth System Root catalog
+10 −80
Original line number Diff line number Diff line
#!/bin/bash

set -eo pipefail
set -euo pipefail

function info { echo "[INFO] $1"; }
function error { echo "[ERROR] $1" 1>&2; exit 1; }
. /esg/bin/functions.sh

#####
## This script sets up the publisher container before executing the given command
##
## This includes interpolating configuration files in /esg/config/esgcet with
## values from the environment, running "esginitialize -c" and fetching a
## certificate from the SLCS for use with the publish.
## This includes interpolating configuration files in /esg/config/esgcet with
## values from the environment and running "esginitialize -c"
#####

# Make sure the trusted certificates have been updated
@@ -20,88 +18,20 @@ cat /etc/ssl/certs/ca-certificates.crt > /esg/config/esgcet/trust-bundle.pem
cat /esg/certificates/esg-trust-bundle.pem >> /esg/config/esgcet/trust-bundle.pem
export SSL_CERT_FILE=/esg/config/esgcet/trust-bundle.pem

info "Configuring environment"
###
#Check that required variables exist and set some defaults
###
# Database settings
[ -z "$ESGF_DATABASE_HOST" ] && error "ESGF_DATABASE_HOST must be set"
: ${ESGF_DATABASE_NAME:="esgcet"}
: ${ESGF_DATABASE_PORT:="5432"}
: ${ESGF_DATABASE_USER:="dbsuper"}
if [ -z "$ESGF_DATABASE_PASSWORD" ]; then
    [ -z "$ESGF_DATABASE_PASSWORD_FILE" ] && \
        error "ESGF_DATABASE_PASSWORD or ESGF_DATABASE_PASSWORD_FILE must be set"
    [ -f "$ESGF_DATABASE_PASSWORD_FILE" ] || \
        error "ESGF_DATABASE_PASSWORD_FILE does not exist"
    ESGF_DATABASE_PASSWORD="$(cat "$ESGF_DATABASE_PASSWORD_FILE")"
fi
# Hostnames and URLs for components
: ${ESGF_INDEX_NODE_HOSTNAME:="$ESGF_HOSTNAME"}
if [ -z "$ESGF_HESSIAN_URL" ]; then
    [ -z "$ESGF_INDEX_NODE_HOSTNAME" ] && error "ESGF_HESSIAN_URL, ESGF_INDEX_NODE_HOSTNAME or ESGF_HOSTNAME must be set"
    ESGF_HESSIAN_URL="https://${ESGF_INDEX_NODE_HOSTNAME}/esg-search/remote/secure/client-cert/hessian/publishingService"
fi
if [ -z "$ESGF_HESSIAN_METADATA_URL" ]; then
    [ -z "$ESGF_HOSTNAME" ] && error "ESGF_HESSIAN_METADATA_URL or ESGF_HOSTNAME must be set"
    ESGF_HESSIAN_METADATA_URL="https://${ESGF_HOSTNAME}/esgcet/remote/hessian/guest/remoteMetadataService"
fi
: ${ESGF_TDS_HOSTNAME:="$ESGF_HOSTNAME"}
if [ -z "$ESGF_TDS_CATALOG_URL" ]; then
    [ -z "$ESGF_TDS_HOSTNAME" ] && error "ESGF_TDS_CATALOG_URL, ESGF_TDS_HOSTNAME or ESGF_HOSTNAME must be set"
    ESGF_TDS_CATALOG_URL="https://${ESGF_TDS_HOSTNAME}/thredds/catalog/esgcet"
fi
if [ -z "$ESGF_TDS_REINIT_URL" ]; then
    [ -z "$ESGF_TDS_HOSTNAME" ] && error "ESGF_TDS_REINIT_URL, ESGF_TDS_HOSTNAME or ESGF_HOSTNAME must be set"
    ESGF_TDS_REINIT_URL="https://${ESGF_TDS_HOSTNAME}/thredds/admin/debug?Catalogs/recheck"
fi
if [ -z "$ESGF_TDS_REINIT_ERROR_URL" ]; then
    [ -z "$ESGF_TDS_HOSTNAME" ] && error "ESGF_TDS_REINIT_ERROR_URL, ESGF_TDS_HOSTNAME or ESGF_HOSTNAME must be set"
    ESGF_TDS_REINIT_ERROR_URL="https://${ESGF_TDS_HOSTNAME}/thredds/admin/content/logs/catalogInit.log"
fi
[ -z "$ESGF_TDS_USERNAME" ] && error "ESGF_TDS_USERNAME must be set"
if [ -z "$ESGF_TDS_PASSWORD" ]; then
    [ -z "$ESGF_TDS_PASSWORD_FILE" ] && \
        error "ESGF_TDS_PASSWORD or ESGF_TDS_PASSWORD_FILE must be set"
    [ -f "$ESGF_TDS_PASSWORD_FILE" ] || \
        error "ESGF_TDS_PASSWORD_FILE does not exist"
    ESGF_TDS_PASSWORD="$(cat "$ESGF_TDS_PASSWORD_FILE")"
fi

# Make sure we export all the required configs
export ESGF_DATABASE_HOST \
       ESGF_DATABASE_NAME \
       ESGF_DATABASE_PORT \
       ESGF_DATABASE_USER \
       ESGF_DATABASE_PASSWORD \
       ESGF_HESSIAN_URL \
       ESGF_HESSIAN_METADATA_URL \
       ESGF_TDS_CATALOG_URL \
       ESGF_TDS_REINIT_URL \
       ESGF_TDS_REINIT_ERROR_URL \
       ESGF_TDS_USERNAME \
       ESGF_TDS_PASSWORD

info "Using environment:"
env | grep "ESGF_" | grep -v "_PASSWORD"

# Compose the database URL
export DATABASE_URL="postgresql://${ESGF_DATABASE_USER}:$(< "$ESGF_DATABASE_PASSWORD_FILE")@${ESGF_DATABASE_HOST}:${ESGF_DATABASE_PORT}/${ESGF_DATABASE_NAME}"

###
# Interpolate each file ending in .template unless the actual file already exists
# Build the configuration directory
###
info "Interpolating config files"
for src in $(find /esg/config/esgcet -type f -name '*.template'); do
    dest="${src%".template"}"
    [ -f "$dest" ] || envsubst < "$src" > "$dest"
done
/esg/bin/build-config.sh /esg/config/esgcet

# Initialise the schema migration
info "Enabling schema versioning"
DB_URL=${ESGF_DATABASE_PROTOCOL:-postgresql}://${ESGF_DATABASE_USER}:${ESGF_DATABASE_PASSWORD}@${ESGF_DATABASE_HOST}:${ESGF_DATABASE_PORT}/${ESGF_DATABASE_NAME}
if python -m esgcet.schema_migration.manage db_version "${DB_URL}" 1>/dev/null 2>&1; then
if python -m esgcet.schema_migration.manage db_version "${DATABASE_URL}" 1>/dev/null 2>&1; then
    info "  Schema versioning already enabled - skipping"
else
    python -m esgcet.schema_migration.manage version_control "${DB_URL}"
    python -m esgcet.schema_migration.manage version_control "${DATABASE_URL}"
fi

# Run esginitialize
+2 −10
Original line number Diff line number Diff line
#!/bin/bash

set -eo pipefail
set -euo pipefail

function info { echo "[INFO] $1"; }
function error { echo "[ERROR] $1" 1>&2; exit 1; }
. /esg/bin/functions.sh

#####
## This script fetches a certificate from the SLCS for the given username and password
#####

[ -z "$ESGF_SLCS_CERTIFICATE_URL" ] && \
    [ -z "$ESGF_SLCS_URL" ] && \
    [ -z "$ESGF_HOSTNAME" ] && \
    error "ESGF_SLCS_CERTIFICATE_URL, ESGF_SLCS_URL or ESGF_HOSTNAME must be set"
: ${ESGF_SLCS_URL:="https://${ESGF_HOSTNAME}/esgf-slcs"}
: ${ESGF_SLCS_CERTIFICATE_URL:="${ESGF_SLCS_URL}/onlineca/certificate/"}

info "Fetching short-lived certificate from $ESGF_SLCS_CERTIFICATE_URL"

username="$1"