Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
c83c3cf
Merge pull request #10 from TkTech/master
Sep 22, 2017
83c959d
Merge pull request #18 from TkTech/master
iaroslav13 Nov 20, 2017
940dca2
CKAN 2.9 support
smotornyuk Dec 19, 2019
c33e9a7
futurize
smotornyuk Dec 19, 2019
7808057
Revert "futurize"
smotornyuk Dec 19, 2019
8e1f7cd
Format updated files
smotornyuk Dec 19, 2019
8730fa7
futurize
smotornyuk Dec 19, 2019
fe2020a
Restore `storage.py` formatting`
smotornyuk Dec 19, 2019
8e9c264
Update libcloud version. Handle py3 uploads
smotornyuk Jan 15, 2020
89633a4
Import six
smotornyuk Jan 15, 2020
7be388b
Pin libcloud version
smotornyuk Jan 15, 2020
46a8158
Code fixed for libckloud2
smotornyuk Jan 15, 2020
501f4a8
Additional check of upload type
smotornyuk Feb 6, 2020
96150f8
Disable cleanup before multipart upload
smotornyuk May 5, 2020
8436cd5
Remove hardcoded fixes
smotornyuk May 6, 2020
9ab02ae
Initial configuration of pytests
smotornyuk May 6, 2020
be1bc26
Quick tests for mait points
smotornyuk May 8, 2020
c3788a5
Merge branch 'master' into py3
smotornyuk May 14, 2020
f567be5
Allow multipart upload without secure urls
smotornyuk May 26, 2020
46e03c5
Fix bug with resource-links
smotornyuk Jul 23, 2020
51f0d6e
Avoid 500 error when object does not exist
smotornyuk Oct 2, 2020
469e8f8
Handle unicode path
smotornyuk Oct 21, 2020
d0ad841
add the region_name as driver option
espona Dec 1, 2020
fd802f9
Fixes on the JS to visualize the progress bar
espona Dec 3, 2020
8d71db7
fixed resume uploads
espona Dec 3, 2020
4710160
Fixed error message on resume upload
espona Dec 3, 2020
edfcf06
code cleaning
espona Dec 4, 2020
a256d59
Added file size limitation
espona Dec 4, 2020
96ff002
Remove write to log, not working
espona Dec 7, 2020
ab99751
simplified upload
espona Dec 7, 2020
cb16d52
improved error handling
espona Dec 7, 2020
2bdefeb
Improved migration
espona Dec 8, 2020
2ec6206
Fix file migration script
Dec 28, 2020
26d1a2b
Add migration script
smotornyuk Jan 12, 2021
cd6fbf6
Fix progressbar
smotornyuk Jan 12, 2021
5239c9a
Add alembic to git
smotornyuk Jan 19, 2021
0ff667c
Merge remote-tracking branch 'envidat/py3' into py3
smotornyuk Jan 20, 2021
4d430e6
Fix resource_create, resource_patch API calls
ostyhar Feb 1, 2021
330a6b3
Merge pull request #26 from DataShades/SEED-411.4
smotornyuk Feb 1, 2021
88984c9
Increase upload speed for multiline files
smotornyuk Feb 4, 2021
51f5ff2
Standard version check
smotornyuk Feb 10, 2021
93c2e0b
Py2 support of max upload size
smotornyuk Feb 24, 2021
31607bb
Merge remote-tracking branch 'origin/master' into py3
smotornyuk Sep 3, 2021
2274def
Update last_modified
smotornyuk Sep 3, 2021
08858c1
Add ckanapi to requirements
smotornyuk Oct 26, 2021
de394aa
Set content-type for multipart uploads
smotornyuk Nov 2, 2021
94d00d3
Merge branch 'py3' of https://github.com/DataShades/ckanext-cloudstor…
smotornyuk Nov 2, 2021
4b643bd
Multipart allows subclassing of uploader
smotornyuk Feb 10, 2022
b0aa5c0
better multipart removal
smotornyuk Mar 3, 2022
f2cf270
Update test usite
smotornyuk Mar 17, 2022
3469f37
Merge remote-tracking branch 'tk/master' into py3
smotornyuk Mar 17, 2022
ed86723
Use custom uploader all the time
smotornyuk May 2, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ syntax: glob
*.swo
.DS_Store
ckan.egg-info/*
ckanext_cloudstorage.egg-info/*
sandbox/*
dist

Expand All @@ -16,5 +17,6 @@ tmp/*
solr_runtime/*
fl_notes.txt
*.ini
!alembic.ini
.noseids
*~
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ below have been tested:
| Provider | Uploads | Downloads | Secure URLs (private resources) |
| --- | --- | --- | --- |
| Azure | YES | YES | YES (if `azure-storage` is installed) |
| AWS S3 | YES | YES | YES (if `boto` is installed) |
| AWS S3 | YES | YES | YES (if `boto` is installed and `host` key added to `driver_options`) |
| Rackspace | YES | YES | No |

# What are "Secure URLs"?
Expand All @@ -54,7 +54,12 @@ benefits of your CDN/blob storage.

This option also enables multipart uploads, but you need to create database tables
first. Run next command from extension folder:
`paster cloudstorage initdb -c /etc/ckan/default/production.ini `

paster cloudstorage initdb -c /etc/ckan/default/production.ini

For CKAN>=2.9 use the following command instead:

ckan -c /etc/ckan/default/production.ini db upgrade -p cloudstorage

With that feature you can use `cloudstorage_clean_multipart` action, which is available
only for sysadmins. After executing, all unfinished multipart uploads, older than 7 days,
Expand Down
184 changes: 23 additions & 161 deletions ckanext/cloudstorage/cli.py
Original file line number Diff line number Diff line change
@@ -1,170 +1,32 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import os.path
import cgi
import tempfile

from docopt import docopt
from ckan.lib.cli import CkanCommand
import click
import ckanext.cloudstorage.utils as utils

from ckanapi import LocalCKAN
from ckanext.cloudstorage.storage import (
CloudStorage,
ResourceCloudStorage
)
from ckanext.cloudstorage.model import (
create_tables,
drop_tables
)
from ckan.logic import NotFound

USAGE = """ckanext-cloudstorage
@click.group()
def cloudstorage():
"""CloudStorage management commands.
"""
pass

Commands:
- fix-cors Update CORS rules where possible.
- migrate Upload local storage to the remote.
- initdb Reinitalize database tables.

Usage:
cloudstorage fix-cors <domains>... [--c=<config>]
cloudstorage migrate <path_to_storage> [<resource_id>] [--c=<config>]
cloudstorage initdb [--c=<config>]
@cloudstorage.command('fix-cors')
@click.argument('domains', nargs=-1)
def fix_cors(domains):
"""Update CORS rules where possible.
"""
msg, ok = utils.fix_cors(domains)
click.secho(msg, fg='green' if ok else 'red')

Options:
-c=<config> The CKAN configuration file.
"""

@cloudstorage.command()
@click.argument('path')
@click.argument('resource', required=False)
def migrate(path, resource):
"""Upload local storage to the remote.
"""
utils.migrate(path, resource)

class FakeFileStorage(cgi.FieldStorage):
def __init__(self, fp, filename):
self.file = fp
self.filename = filename


class PasterCommand(CkanCommand):
summary = 'ckanext-cloudstorage maintence utilities.'
usage = USAGE

def command(self):
self._load_config()
args = docopt(USAGE, argv=self.args)

if args['fix-cors']:
_fix_cors(args)
elif args['migrate']:
_migrate(args)
elif args['initdb']:
_initdb()


def _migrate(args):
path = args['<path_to_storage>']
single_id = args['<resource_id>']
if not os.path.isdir(path):
print('The storage directory cannot be found.')
return

lc = LocalCKAN()
resources = {}
failed = []

# The resource folder is stuctured like so on disk:
# - storage/
# - ...
# - resources/
# - <3 letter prefix>
# - <3 letter prefix>
# - <remaining resource_id as filename>
# ...
# ...
# ...
for root, dirs, files in os.walk(path):
# Only the bottom level of the tree actually contains any files. We
# don't care at all about the overall structure.
if not files:
continue

split_root = root.split('/')
resource_id = split_root[-2] + split_root[-1]

for file_ in files:
ckan_res_id = resource_id + file_
if single_id and ckan_res_id != single_id:
continue

resources[ckan_res_id] = os.path.join(
root,
file_
)

for i, resource in enumerate(resources.iteritems(), 1):
resource_id, file_path = resource
print('[{i}/{count}] Working on {id}'.format(
i=i,
count=len(resources),
id=resource_id
))

try:
resource = lc.action.resource_show(id=resource_id)
except NotFound:
print(u'\tResource not found')
continue
if resource['url_type'] != 'upload':
print(u'\t`url_type` is not `upload`. Skip')
continue

with open(file_path, 'rb') as fin:
resource['upload'] = FakeFileStorage(
fin,
resource['url'].split('/')[-1]
)
try:
uploader = ResourceCloudStorage(resource)
uploader.upload(resource['id'])
except Exception as e:
failed.append(resource_id)
print(u'\tError of type {0} during upload: {1}'.format(type(e), e))

if failed:
log_file = tempfile.NamedTemporaryFile(delete=False)
log_file.file.writelines(failed)
print(u'ID of all failed uploads are saved to `{0}`'.format(log_file.name))


def _fix_cors(args):
cs = CloudStorage()

if cs.can_use_advanced_azure:
from azure.storage import blob as azure_blob
from azure.storage import CorsRule

blob_service = azure_blob.BlockBlobService(
cs.driver_options['key'],
cs.driver_options['secret']
)

blob_service.set_blob_service_properties(
cors=[
CorsRule(
allowed_origins=args['<domains>'],
allowed_methods=['GET']
)
]
)
print('Done!')
else:
print(
'The driver {driver_name} being used does not currently'
' support updating CORS rules through'
' cloudstorage.'.format(
driver_name=cs.driver_name
)
)


def _initdb():
drop_tables()
create_tables()
print("DB tables are reinitialized")
def get_commands():
return [cloudstorage]
56 changes: 56 additions & 0 deletions ckanext/cloudstorage/commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function
from ckan.lib.cli import CkanCommand
from docopt import docopt

import ckanext.cloudstorage.utils as utils

USAGE = """ckanext-cloudstorage

Commands:
- fix-cors Update CORS rules where possible.
- migrate Upload local storage to the remote.
- initdb Reinitalize database tables.

Usage:
cloudstorage fix-cors <domains>... [--c=<config>]
cloudstorage migrate <path_to_storage> [<resource_id>] [--c=<config>]
cloudstorage initdb [--c=<config>]

Options:
-c=<config> The CKAN configuration file.
"""


class PasterCommand(CkanCommand):
summary = 'ckanext-cloudstorage maintence utilities.'
usage = USAGE

def command(self):
self._load_config()
args = docopt(USAGE, argv=self.args)

if args['fix-cors']:
_fix_cors(args)
elif args['migrate']:
_migrate(args)
elif args['initdb']:
_initdb()


def _migrate(args):
path = args['<path_to_storage>']
single_id = args['<resource_id>']
utils.migrate(path, single_id)


def _fix_cors(args):
msg, _ = utils.fix_cors(args['<domains>'])
print(msg)


def _initdb():
utils.initdb()
print("DB tables are reinitialized")
58 changes: 3 additions & 55 deletions ckanext/cloudstorage/controller.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,10 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os.path

from pylons import c
from pylons.i18n import _

from ckan import logic, model
from ckan.lib import base, uploader
import ckan.lib.helpers as h
from ckan.lib import base
import ckanext.cloudstorage.utils as utils


class StorageController(base.BaseController):
def resource_download(self, id, resource_id, filename=None):
context = {
'model': model,
'session': model.Session,
'user': c.user or c.author,
'auth_user_obj': c.userobj
}

try:
resource = logic.get_action('resource_show')(
context,
{
'id': resource_id
}
)
except logic.NotFound:
base.abort(404, _('Resource not found'))
except logic.NotAuthorized:
base.abort(401, _('Unauthorized to read resource {0}'.format(id)))

# This isn't a file upload, so either redirect to the source
# (if available) or error out.
if resource.get('url_type') != 'upload':
url = resource.get('url')
if not url:
base.abort(404, _('No download is available'))
h.redirect_to(url)

if filename is None:
# No filename was provided so we'll try to get one from the url.
filename = os.path.basename(resource['url'])

upload = uploader.get_resource_uploader(resource)

# if the client requests with a Content-Type header (e.g. Text preview)
# we have to add the header to the signature
try:
content_type = getattr(c.pylons.request, "content_type", None)
except AttributeError:
content_type = None
uploaded_url = upload.get_url_from_filename(resource['id'], filename,
content_type=content_type)

# The uploaded file is missing for some reason, such as the
# provider being down.
if uploaded_url is None:
base.abort(404, _('No download is available'))

h.redirect_to(uploaded_url)
return utils.resource_download(id, resource_id, filename)
Loading