diff --git a/README.md b/README.md index a13693d..94a7257 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,69 @@ # docker-drag -This repository contains Python scripts for interacting with Docker Hub or other registries, without needing the Docker client itself. +This repository contains a Python script for interacting with Docker Hub or other registries, without needing the Docker client itself. It relies on the Docker registry [HTTPS API v2](https://docs.docker.com/registry/spec/api/). -## Pull a Docker image in HTTPS +## Usage -`python docker_pull.py hello-world` +### Basic Usage -`python docker_pull.py mysql/mysql-server:8.0` +To pull a standard Docker image, provide the image name as an argument. The script will download the image and package it as a `.tar` file. -`python docker_pull.py mcr.microsoft.com/mssql-tools` +```shell +# Pull the hello-world image +python docker_pull.py hello-world -`python docker_pull.py consul@sha256:6ba4bfe1449ad8ac5a76cb29b6c3ff54489477a23786afb61ae30fb3b1ac0ae9` +# Pull a specific version of an official image +python docker_pull.py mysql/mysql-server:8.0 +python docker_pull.py protopie/enterprise-onpremises:api-15.8.3 -After the image has been downloaded (`.tar` file), you can then import it and use it with the following docker commands: +# Pull from a different registry +python docker_pull.py mcr.microsoft.com/mssql-tools +# Pull an image by its digest +python docker_pull.py consul@sha256:6ba4bfe1449ad8ac5a76cb29b6c3ff54489477a23786afb61ae30fb3b1ac0ae9 ``` -docker load -i library_ubuntu.tar -docker run -it ubuntu + +After the image has been downloaded, you can load it into Docker using `docker load`: +```shell +docker load -i .tar +docker run -it +``` + +### Handling Multi-Architecture Images + +Many modern Docker images support multiple CPU architectures (e.g., `amd64`, `arm64`). This script allows you to select which architecture to pull. + +**1. List Available Platforms** + +If you run the script on a multi-architecture image without specifying a platform, it will list all available platforms and the corresponding `--platform` argument to use. + +```shell +python docker_pull.py hello-world ``` +**Example Output:** +``` +[+] This is a multi-architecture image. Please specify a platform using the --platform argument. +[i] Available platforms are: + --platform linux/amd64 # os: linux, architecture: amd64 (digest: sha256:...) + --platform linux/arm/v5 # os: linux, architecture: arm, variant: v5 (digest: sha256:...) + --platform linux/arm/v7 # os: linux, architecture: arm, variant: v7 (digest: sha256:...) + --platform linux/arm64/v8 # os: linux, architecture: arm64, variant: v8 (digest: sha256:...) + --platform windows/amd64 # os: windows, architecture: amd64, os.version: ... (digest: sha256:...) +``` + +**2. Pull a Specific Platform** + +Use the `--platform` flag with the desired platform string from the list above. + +```shell +# Pull the linux/arm64 version of hello-world +python docker_pull.py hello-world --platform linux/arm64/v8 +``` + +The script will then download the image for the specified architecture. +

@@ -31,4 +75,4 @@ docker run -it ubuntu ## Well known bugs 2 open bugs which shouldn't affect the efficiency of the script nor the pulled image: - Unicode content (for example `\u003c`) gets automatically decoded by `json.loads()` which differs from the original Docker client behaviour (`\u003c` should not be decoded when creating the TAR file). This is due to the json Python library automatically converting string to unicode. -- Fake layers ID are not calculated the same way than Docker client does (I don't know yet how layer hashes are generated, but it seems deterministic and based on the client) +- Fake layers ID are not calculated the same way than Docker client does (I don't know yet how layer hashes are generated, but it seems deterministic and based on the client) \ No newline at end of file diff --git a/docker_pull.py b/docker_pull.py index 8484f3f..faa9c83 100644 --- a/docker_pull.py +++ b/docker_pull.py @@ -8,16 +8,19 @@ import requests import tarfile import urllib3 +import argparse urllib3.disable_warnings() -if len(sys.argv) != 2 : - print('Usage:\n\tdocker_pull.py [registry/][repository/]image[:tag|@digest]\n') - exit(1) +parser = argparse.ArgumentParser(description='Pull Docker images without a Docker daemon by directly interacting with the registry API.') +parser.add_argument('image', help='The Docker image to pull, e.g., "ubuntu:latest" or "hello-world@"') +parser.add_argument('--platform', help='Set platform to pull a specific architecture, e.g., "linux/amd64" or "arm64"') +args = parser.parse_args() + # Look for the Docker image to download repo = 'library' tag = 'latest' -imgparts = sys.argv[1].split('/') +imgparts = args.image.split('/') try: img,tag = imgparts[-1].split('@') except ValueError: @@ -69,22 +72,119 @@ def progress_bar(ublob, nb_traits): sys.stdout.flush() # Fetch manifest v2 and get image layer digests -auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json') +# First, try to get a manifest list or a single manifest +auth_head = get_auth_head('application/vnd.docker.distribution.manifest.list.v2+json, application/vnd.docker.distribution.manifest.v2+json') resp = requests.get('https://{}/v2/{}/manifests/{}'.format(registry, repository, tag), headers=auth_head, verify=False) -if (resp.status_code != 200): - print('[-] Cannot fetch manifest for {} [HTTP {}]'.format(repository, resp.status_code)) - print(resp.content) - auth_head = get_auth_head('application/vnd.docker.distribution.manifest.list.v2+json') - resp = requests.get('https://{}/v2/{}/manifests/{}'.format(registry, repository, tag), headers=auth_head, verify=False) - if (resp.status_code == 200): - print('[+] Manifests found for this tag (use the @digest format to pull the corresponding image):') - manifests = resp.json()['manifests'] - for manifest in manifests: - for key, value in manifest["platform"].items(): - sys.stdout.write('{}: {}, '.format(key, value)) - print('digest: {}'.format(manifest["digest"])) - exit(1) -layers = resp.json()['layers'] + +if resp.status_code != 200: + print('[-] Cannot fetch manifest for {} [HTTP {}]'.format(repository, resp.status_code)) + print(resp.content) + exit(1) + +manifest_data = resp.json() + +# Handle manifest list (multi-architecture) +if 'manifests' in manifest_data: + manifests = manifest_data['manifests'] + + # If --platform is specified, find the matching digest + if args.platform: + # Platform string parsing for "os/arch/variant", "os/arch", or "arch" + parts = args.platform.split('/') + req_os, req_arch, req_variant = None, None, None + if len(parts) == 1: + req_arch = parts[0] + elif len(parts) == 2: + req_os, req_arch = parts + elif len(parts) >= 3: + req_os, req_arch, req_variant = parts[:3] + + # If os is not specified, default to linux + if req_os is None: + req_os = 'linux' + + matching_manifests = [] + for manifest in manifests: + plat = manifest.get('platform', {}) + arch = plat.get('architecture') + manifest_os = plat.get('os') + variant = plat.get('variant') + + # Match required fields. A specified field must match. + if manifest_os != req_os or arch != req_arch: + continue + + # If variant is specified, it must match. If not specified, we accept any variant. + if req_variant is not None and req_variant != variant: + continue + + matching_manifests.append(manifest) + + if len(matching_manifests) == 0: + print(f'[-] Could not find a manifest for platform: {args.platform}') + print('[i] Available platforms are:') + for manifest in manifests: + plat = manifest.get("platform", {}) + platform_info = ', '.join([f'{key}: {value}' for key, value in plat.items()]) + + # Construct the platform string for the --platform argument + platform_arg_parts = [] + if plat.get('os'): platform_arg_parts.append(plat.get('os')) + if plat.get('architecture'): platform_arg_parts.append(plat.get('architecture')) + if plat.get('variant'): platform_arg_parts.append(plat.get('variant')) + platform_arg_str = '/'.join(platform_arg_parts) + + print(f' --platform {platform_arg_str:<30} # {platform_info} (digest: {manifest["digest"]})') + exit(1) + + elif len(matching_manifests) > 1: + print(f'[!] Ambiguous platform: --platform {args.platform} matches multiple images.') + print(f'[i] Please select one by re-running the command with its specific digest, e.g.:') + print(f' python docker_pull.py {args.image}@') + print('[i] Conflicting manifests are:') + for manifest in matching_manifests: + platform_info = ', '.join([f'{key}: {value}' for key, value in manifest.get("platform", {}).items()]) + print(f' - {platform_info} (digest: {manifest["digest"]})') + exit(1) + + else: # Exactly one match + digest = matching_manifests[0]['digest'] + print(f'[+] Found unique digest for platform {args.platform}: {digest}') + + # Re-fetch the specific manifest using its digest + auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json') + resp = requests.get('https://{}/v2/{}/manifests/{}'.format(registry, repository, digest), headers=auth_head, verify=False) + if resp.status_code != 200: + print(f'[-] Failed to fetch manifest for digest {digest} [HTTP {resp.status_code}]') + print(resp.content) + exit(1) + manifest_data = resp.json() + + # If --platform is NOT specified for a multi-arch image, print list and exit + else: + print('[+] This is a multi-architecture image. Please specify a platform using the --platform argument.') + print('[i] Available platforms are:') + for manifest in manifests: + plat = manifest.get("platform", {}) + platform_info = ', '.join([f'{key}: {value}' for key, value in plat.items()]) + + # Construct the platform string for the --platform argument + platform_arg_parts = [] + if plat.get('os'): platform_arg_parts.append(plat.get('os')) + if plat.get('architecture'): platform_arg_parts.append(plat.get('architecture')) + if plat.get('variant'): platform_arg_parts.append(plat.get('variant')) + platform_arg_str = '/'.join(platform_arg_parts) + + print(f' --platform {platform_arg_str:<30} # {platform_info} (digest: {manifest["digest"]})') + exit(1) + +# At this point, manifest_data should be a single-architecture manifest +if 'layers' not in manifest_data: + print(f'[-] Unexpected manifest format for {repository}. Expected a single manifest but got something else.') + print(resp.text) + exit(1) + +layers = manifest_data['layers'] # Create tmp folder that will hold the image imgdir = 'tmp_{}_{}'.format(img, tag.replace(':', '@'))