diff --git a/datasets/esa-cci-lc/Dockerfile b/datasets/esa-cci-lc/Dockerfile new file mode 100644 index 00000000..dc4fc425 --- /dev/null +++ b/datasets/esa-cci-lc/Dockerfile @@ -0,0 +1,74 @@ +FROM ubuntu:20.04 + +# Setup timezone info +ENV TZ=UTC + +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN apt-get update && apt-get install -y software-properties-common + +RUN add-apt-repository ppa:ubuntugis/ppa && \ + apt-get update && \ + apt-get install -y build-essential python3-dev python3-pip \ + jq unzip ca-certificates wget curl git && \ + apt-get autoremove && apt-get autoclean && apt-get clean + +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 + +# See https://github.com/mapbox/rasterio/issues/1289 +ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + +# Install Python 3.8 +RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \ + && bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \ + && rm -rf "Mambaforge-$(uname)-$(uname -m).sh" + +ENV PATH /opt/conda/bin:$PATH +ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH + +RUN mamba install -y -c conda-forge python=3.8 gdal=3.3.3 pip setuptools cython numpy==1.21.5 + +RUN python -m pip install --upgrade pip + +# Install common packages +COPY requirements-task-base.txt /tmp/requirements.txt +RUN python -m pip install --no-build-isolation -r /tmp/requirements.txt + +# +# Copy and install packages +# + +COPY pctasks/core /opt/src/pctasks/core +RUN cd /opt/src/pctasks/core && \ + pip install . + +COPY pctasks/cli /opt/src/pctasks/cli +RUN cd /opt/src/pctasks/cli && \ + pip install . + +COPY pctasks/task /opt/src/pctasks/task +RUN cd /opt/src/pctasks/task && \ + pip install . + +COPY pctasks/client /opt/src/pctasks/client +RUN cd /opt/src/pctasks/client && \ + pip install . + +COPY pctasks/ingest /opt/src/pctasks/ingest +RUN cd /opt/src/pctasks/ingest && \ + pip install . + +COPY pctasks/dataset /opt/src/pctasks/dataset +RUN cd /opt/src/pctasks/dataset && \ + pip install . + +COPY ./datasets/esa-cci-lc/requirements.txt /opt/src/datasets/esa-cci-lc/requirements.txt +RUN python3 -m pip install -r /opt/src/datasets/esa-cci-lc/requirements.txt + +# Setup Python Path to allow import of test modules +ENV PYTHONPATH=/opt/src:$PYTHONPATH + +WORKDIR /opt/src diff --git a/datasets/esa-cci-lc/README.md b/datasets/esa-cci-lc/README.md new file mode 100644 index 00000000..de770eef --- /dev/null +++ b/datasets/esa-cci-lc/README.md @@ -0,0 +1,7 @@ +# ESA CCI + +## Docker container + +```shell +az acr build -r {the registry} --subscription {the subscription} -t pctasks-esa-cci-lc:latest -f datasets/esa-cci-lc/Dockerfile . +``` diff --git a/datasets/esa-cci-lc/collection/esa-cci-lc-netcdf/template.json b/datasets/esa-cci-lc/collection/esa-cci-lc-netcdf/template.json index 3884b4ba..a48ceab0 100644 --- a/datasets/esa-cci-lc/collection/esa-cci-lc-netcdf/template.json +++ b/datasets/esa-cci-lc/collection/esa-cci-lc-netcdf/template.json @@ -105,7 +105,6 @@ { "name": "Microsoft", "roles": [ - "processor", "host" ], "url": "https://planetarycomputer.microsoft.com" diff --git a/datasets/esa-cci-lc/dataset.yaml b/datasets/esa-cci-lc/dataset.yaml index f60bacd6..13a37f32 100644 --- a/datasets/esa-cci-lc/dataset.yaml +++ b/datasets/esa-cci-lc/dataset.yaml @@ -1,12 +1,13 @@ id: esa_cci_lc -image: ${{ args.registry }}/pctasks-task-base:latest +image: ${{ args.registry }}/pctasks-esa-cci-lc:latest args: - registry code: src: ${{ local.path(./esa_cci_lc.py) }} - requirements: ${{ local.path(./requirements.txt) }} + # included in the container image + # requirements: ${{ local.path(./requirements.txt) }} environment: AZURE_TENANT_ID: ${{ secrets.task-tenant-id }} @@ -23,8 +24,7 @@ collections: options: name_starts_with: netcdf/ extension: [.nc] - chunk_length: 2 # for testing; set to 10 for production - limit: 2 # for testing; remove for production + chunk_length: 10 chunk_storage: uri: blob://landcoverdata/esa-cci-lc-etl-data/esa-cci-lc-netcdf/ @@ -37,7 +37,6 @@ collections: options: name_starts_with: netcdf/ extension: [.nc] - chunk_length: 1 # for testing; set to 10 for production - limit: 2 # for testing; remove for production + chunk_length: 10 chunk_storage: uri: blob://landcoverdata/esa-cci-lc-etl-data/esa-cci-lc/