diff --git a/ocrd/ocrd/resource_list.yml b/ocrd/ocrd/resource_list.yml index fcdfa63398..4840fa3296 100644 --- a/ocrd/ocrd/resource_list.yml +++ b/ocrd/ocrd/resource_list.yml @@ -133,19 +133,23 @@ ocrd-anybaseocr-block-segmentation: description: block segmentation model for anybaseocr size: 256139800 ocrd-anybaseocr-layout-analysis: - - url: https://ocr-d-repo.scc.kit.edu/models/dfki/layoutAnalysis/structure_analysis.h5 - name: structure_analysis.h5 + - url: https://ocr-d.kba.cloud/structure_analysis.tar.gz + name: structure_analysis description: structure analysis model for anybaseocr - size: 31477056 + type: tarball + path_in_archive: 'structure_analysis' + size: 29002514 - url: https://ocr-d-repo.scc.kit.edu/models/dfki/layoutAnalysis/mapping_densenet.pickle name: mapping_densenet.pickle description: mapping model for anybaseocr size: 374 ocrd-anybaseocr-tiseg: - - url: https://ocr-d-repo.scc.kit.edu/models/dfki/tiseg/seg_model.hdf5 - name: seg_model.hdf5 + - url: https://ocr-d.kba.cloud/seg_model.tar.gz + name: seg_model description: text image segmentation model for anybaseocr - size: 66080688 + type: tarball + path_in_archive: 'seg_model' + size: 61388872 ocrd-kraken-segment: - url: https://github.com/mittagessen/kraken/raw/master/kraken/blla.mlmodel description: Pretrained baseline segmentation model diff --git a/ocrd/ocrd/resource_manager.py b/ocrd/ocrd/resource_manager.py index 06832e0fd2..4720cb54ef 100644 --- a/ocrd/ocrd/resource_manager.py +++ b/ocrd/ocrd/resource_manager.py @@ -249,17 +249,17 @@ def download( else: self._copy_impl(url, fpath, progress_cb) elif resource_type == 'tarball': - with pushd_popd(tempdir=True): + with pushd_popd(tempdir=True) as tempdir: if is_url: self._download_impl(url, 'download.tar.xx', progress_cb, size) else: self._copy_impl(url, 'download.tar.xx', progress_cb) Path('out').mkdir() with pushd_popd('out'): - log.info("Extracting tarball") + log.info("Extracting tarball to %s/out" % tempdir) with open_tarfile('../download.tar.xx', 'r:*') as tar: tar.extractall() - log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath)) + log.info("Copying '%s' from extracted tarball %s/out to %s" % (path_in_archive, tempdir, fpath)) copytree(path_in_archive, str(fpath)) # TODO # elif resource_type == 'github-dir':