diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 4c9ac40d7..6a20bd6ac 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -62,8 +62,8 @@ jobs: python -m pip install setuptools==80.1.0 poetry install --without=docs --with=dev - - name: Run flake8 - run: poetry run flake8 . + - name: Run ruff + run: poetry run ruff check . - name: Run unit tests run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..0d41750f5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,9 @@ +# .pre-commit-config.yaml + +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.10 + hooks: + - id: ruff + args: [ --fix ] + - id: ruff-format \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 630fd717a..9da30efce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,6 +63,7 @@ ARG GIT_COMMIT= ENV GIT_COMMIT=${GIT_COMMIT} RUN poetry install --without=docs +RUN pip install pre-commit EXPOSE 7778 diff --git a/README.md b/README.md index c8bb4b69b..8193390e7 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,14 @@ invoke install --develop invoke test ``` +### Contributing + +Before commiting on first commit use the command below to apply ruff autoformatting and follow same code style: + +```bash +pre-commit install +``` + ### Known issues - **Updated, 2018-03-01:** *MFR has been updated to work with setuptools==37.0.0 as of MFR release v0.25. The following issue should not happen for new installs, but may occur if you downgrade to an older version.* Running `invoke install -d` with setuptools v31 or greater can break MFR. The symptom error message is: `"AttributeError: module 'mfr' has no attribute '__version__'".` If you encounter this, you will need to remove the file `mfr-nspkg.pth` from your virtualenv directory, run `pip install setuptools==30.4.0`, then re-run `invoke install -d`. diff --git a/docs/_themes/flask_theme_support.py b/docs/_themes/flask_theme_support.py index c63811dd8..e073bd1f0 100755 --- a/docs/_themes/flask_theme_support.py +++ b/docs/_themes/flask_theme_support.py @@ -1,7 +1,19 @@ # flasky extensions. flasky pygments style based on tango style from pygments.style import Style -from pygments.token import Keyword, Name, Comment, String, Error, \ - Number, Operator, Generic, Whitespace, Punctuation, Other, Literal +from pygments.token import ( + Keyword, + Name, + Comment, + String, + Error, + Number, + Operator, + Generic, + Whitespace, + Punctuation, + Other, + Literal, +) class FlaskyStyle(Style): @@ -10,78 +22,71 @@ class FlaskyStyle(Style): styles = { # No corresponding class for the following: - #Text: "", # class: '' - Whitespace: "underline #f8f8f8", # class: 'w' - Error: "#a40000 border:#ef2929", # class: 'err' - Other: "#000000", # class 'x' - - Comment: "italic #8f5902", # class: 'c' - Comment.Preproc: "noitalic", # class: 'cp' - - Keyword: "bold #004461", # class: 'k' - Keyword.Constant: "bold #004461", # class: 'kc' - Keyword.Declaration: "bold #004461", # class: 'kd' - Keyword.Namespace: "bold #004461", # class: 'kn' - Keyword.Pseudo: "bold #004461", # class: 'kp' - Keyword.Reserved: "bold #004461", # class: 'kr' - Keyword.Type: "bold #004461", # class: 'kt' - - Operator: "#582800", # class: 'o' - Operator.Word: "bold #004461", # class: 'ow' - like keywords - - Punctuation: "bold #000000", # class: 'p' - + # Text: "", # class: '' + Whitespace: "underline #f8f8f8", # class: 'w' + Error: "#a40000 border:#ef2929", # class: 'err' + Other: "#000000", # class 'x' + Comment: "italic #8f5902", # class: 'c' + Comment.Preproc: "noitalic", # class: 'cp' + Keyword: "bold #004461", # class: 'k' + Keyword.Constant: "bold #004461", # class: 'kc' + Keyword.Declaration: "bold #004461", # class: 'kd' + Keyword.Namespace: "bold #004461", # class: 'kn' + Keyword.Pseudo: "bold #004461", # class: 'kp' + Keyword.Reserved: "bold #004461", # class: 'kr' + Keyword.Type: "bold #004461", # class: 'kt' + Operator: "#582800", # class: 'o' + Operator.Word: "bold #004461", # class: 'ow' - like keywords + Punctuation: "bold #000000", # class: 'p' # because special names such as Name.Class, Name.Function, etc. # are not recognized as such later in the parsing, we choose them # to look the same as ordinary variables. - Name: "#000000", # class: 'n' - Name.Attribute: "#c4a000", # class: 'na' - to be revised - Name.Builtin: "#004461", # class: 'nb' - Name.Builtin.Pseudo: "#3465a4", # class: 'bp' - Name.Class: "#000000", # class: 'nc' - to be revised - Name.Constant: "#000000", # class: 'no' - to be revised - Name.Decorator: "#888", # class: 'nd' - to be revised - Name.Entity: "#ce5c00", # class: 'ni' - Name.Exception: "bold #cc0000", # class: 'ne' - Name.Function: "#000000", # class: 'nf' - Name.Property: "#000000", # class: 'py' - Name.Label: "#f57900", # class: 'nl' - Name.Namespace: "#000000", # class: 'nn' - to be revised - Name.Other: "#000000", # class: 'nx' - Name.Tag: "bold #004461", # class: 'nt' - like a keyword - Name.Variable: "#000000", # class: 'nv' - to be revised - Name.Variable.Class: "#000000", # class: 'vc' - to be revised - Name.Variable.Global: "#000000", # class: 'vg' - to be revised - Name.Variable.Instance: "#000000", # class: 'vi' - to be revised - - Number: "#990000", # class: 'm' - - Literal: "#000000", # class: 'l' - Literal.Date: "#000000", # class: 'ld' + Name: "#000000", # class: 'n' + Name.Attribute: "#c4a000", # class: 'na' - to be revised + Name.Builtin: "#004461", # class: 'nb' + Name.Builtin.Pseudo: "#3465a4", # class: 'bp' + Name.Class: "#000000", # class: 'nc' - to be revised + Name.Constant: "#000000", # class: 'no' - to be revised + Name.Decorator: "#888", # class: 'nd' - to be revised + Name.Entity: "#ce5c00", # class: 'ni' + Name.Exception: "bold #cc0000", # class: 'ne' + Name.Function: "#000000", # class: 'nf' + Name.Property: "#000000", # class: 'py' + Name.Label: "#f57900", # class: 'nl' + Name.Namespace: "#000000", # class: 'nn' - to be revised + Name.Other: "#000000", # class: 'nx' + Name.Tag: "bold #004461", # class: 'nt' - like a keyword + Name.Variable: "#000000", # class: 'nv' - to be revised + Name.Variable.Class: "#000000", # class: 'vc' - to be revised + Name.Variable.Global: "#000000", # class: 'vg' - to be revised + Name.Variable.Instance: "#000000", # class: 'vi' - to be revised + Number: "#990000", # class: 'm' + Literal: "#000000", # class: 'l' + Literal.Date: "#000000", # class: 'ld' + String: "#4e9a06", # class: 's' + String.Backtick: "#4e9a06", # class: 'sb' + String.Char: "#4e9a06", # class: 'sc' + String.Doc: "italic #8f5902", # class: 'sd' - like a comment + String.Double: "#4e9a06", # class: 's2' + String.Escape: "#4e9a06", # class: 'se' + String.Heredoc: "#4e9a06", # class: 'sh' + String.Interpol: "#4e9a06", # class: 'si' + String.Other: "#4e9a06", # class: 'sx' + String.Regex: "#4e9a06", # class: 'sr' + String.Single: "#4e9a06", # class: 's1' + String.Symbol: "#4e9a06", # class: 'ss' + Generic: "#000000", # class: 'g' + Generic.Deleted: "#a40000", # class: 'gd' + Generic.Emph: "italic #000000", # class: 'ge' + Generic.Error: "#ef2929", # class: 'gr' + Generic.Heading: "bold #000080", # class: 'gh' + Generic.Inserted: "#00A000", # class: 'gi' + Generic.Output: "#888", # class: 'go' + Generic.Prompt: "#745334", # class: 'gp' + Generic.Strong: "bold #000000", # class: 'gs' + Generic.Subheading: "bold #800080", # class: 'gu' + Generic.Traceback: "bold #a40000", # class: 'gt' + } - String: "#4e9a06", # class: 's' - String.Backtick: "#4e9a06", # class: 'sb' - String.Char: "#4e9a06", # class: 'sc' - String.Doc: "italic #8f5902", # class: 'sd' - like a comment - String.Double: "#4e9a06", # class: 's2' - String.Escape: "#4e9a06", # class: 'se' - String.Heredoc: "#4e9a06", # class: 'sh' - String.Interpol: "#4e9a06", # class: 'si' - String.Other: "#4e9a06", # class: 'sx' - String.Regex: "#4e9a06", # class: 'sr' - String.Single: "#4e9a06", # class: 's1' - String.Symbol: "#4e9a06", # class: 'ss' - Generic: "#000000", # class: 'g' - Generic.Deleted: "#a40000", # class: 'gd' - Generic.Emph: "italic #000000", # class: 'ge' - Generic.Error: "#ef2929", # class: 'gr' - Generic.Heading: "bold #000080", # class: 'gh' - Generic.Inserted: "#00A000", # class: 'gi' - Generic.Output: "#888", # class: 'go' - Generic.Prompt: "#745334", # class: 'gp' - Generic.Strong: "bold #000000", # class: 'gs' - Generic.Subheading: "bold #800080", # class: 'gu' - Generic.Traceback: "bold #a40000", # class: 'gt' - } -# flake8: noqa \ No newline at end of file +# flake8: noqa diff --git a/docs/conf.py b/docs/conf.py index 6d31f10bc..f31c6d74b 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,43 +10,43 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os +import sys from mfr.version import __version__ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, os.path.abspath("..")) sys.path.append(os.path.abspath("_themes")) # -- General configuration ----------------------------------------------------- -autodoc_default_flags = ['members', 'undoc-members', 'show-inheritance'] +autodoc_default_flags = ["members", "undoc-members", "show-inheritance"] # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'mfr' -copyright = '2023, Center For Open Science' +project = "mfr" +copyright = "2023, Center For Open Science" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -57,71 +57,71 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'flask_theme_support.FlaskyStyle' +pygments_style = "flask_theme_support.FlaskyStyle" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -#html_theme = 'kr' +# html_theme = 'kr' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ['_themes'] +html_theme_path = ["_themes"] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -130,58 +130,58 @@ # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. html_sidebars = { - 'index': ['side-primary.html', 'searchbox.html'], - '**': ['side-secondary.html', 'localtoc.html', - 'relations.html', 'searchbox.html'] + "index": ["side-primary.html", "searchbox.html"], + "**": ["side-secondary.html", "localtoc.html", "relations.html", "searchbox.html"], } # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'mfrdoc' +htmlhelp_basename = "mfrdoc" -on_rtd = os.environ.get('READTHEDOCS', None) == 'True' +on_rtd = os.environ.get("READTHEDOCS", None) == "True" # On RTD we can't import sphinx_rtd_theme, but it will be applied by # default anyway. This block will use the same theme when building locally # as on RTD. if not on_rtd: import sphinx_rtd_theme - html_theme = 'sphinx_rtd_theme' + + html_theme = "sphinx_rtd_theme" html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] diff --git a/mfr/core/exceptions.py b/mfr/core/exceptions.py index bfc9fc953..892c7373d 100644 --- a/mfr/core/exceptions.py +++ b/mfr/core/exceptions.py @@ -4,27 +4,26 @@ class PluginError(waterbutler.core.exceptions.PluginError): - """The MFR related errors raised from a plugin should inherit from PluginError - """ + """The MFR related errors raised from a plugin should inherit from PluginError""" - __TYPE = 'plugin' + __TYPE = "plugin" def __init__(self, message, *args, code=500, **kwargs): super().__init__(message, code) self.attr_stack = [ - ['error', {'message': self.message, 'code': self.code}], + ["error", {"message": self.message, "code": self.code}], [self.__TYPE, {}], ] def as_html(self): - return ''' + return """
This text and the text below is only presented because IE consumes error messages below 512 bytes
Want to help save science? Want to get paid to develop free, open source software? Check out our openings!
- '''.format(self.message) + """.format(self.message) @staticmethod def _format_original_exception(exc): @@ -32,10 +31,10 @@ def _format_original_exception(exc): error instead. This method will take in an external error class and format it for consistent representation in the error metrics. """ - formatted_exc = {'class': '', 'message': ''} + formatted_exc = {"class": "", "message": ""} if exc is not None: - formatted_exc['class'] = exc.__class__.__name__ - formatted_exc['message'] = str(exc) + formatted_exc["class"] = exc.__class__.__name__ + formatted_exc["message"] = str(exc) return formatted_exc @@ -44,12 +43,12 @@ class ExtensionError(PluginError): ExtensionError """ - __TYPE = 'extension' + __TYPE = "extension" - def __init__(self, message, *args, extension: str = '', **kwargs): + def __init__(self, message, *args, extension: str = "", **kwargs): super().__init__(message, *args, **kwargs) self.extension = extension - self.attr_stack.append([self.__TYPE, {'extension': self.extension}]) + self.attr_stack.append([self.__TYPE, {"extension": self.extension}]) class RendererError(ExtensionError): @@ -57,12 +56,12 @@ class RendererError(ExtensionError): should inherit from RendererError """ - __TYPE = 'renderer' + __TYPE = "renderer" - def __init__(self, message, *args, renderer_class: str = '', **kwargs): + def __init__(self, message, *args, renderer_class: str = "", **kwargs): super().__init__(message, *args, **kwargs) self.renderer_class = renderer_class - self.attr_stack.append([self.__TYPE, {'class': self.renderer_class}]) + self.attr_stack.append([self.__TYPE, {"class": self.renderer_class}]) class ExporterError(ExtensionError): @@ -70,12 +69,12 @@ class ExporterError(ExtensionError): should inherit from ExporterError """ - __TYPE = 'exporter' + __TYPE = "exporter" - def __init__(self, message, *args, exporter_class: str = '', **kwargs): + def __init__(self, message, *args, exporter_class: str = "", **kwargs): super().__init__(message, *args, **kwargs) self.exporter_class = exporter_class - self.attr_stack.append([self.__TYPE, {'exporter_class': self.exporter_class}]) + self.attr_stack.append([self.__TYPE, {"exporter_class": self.exporter_class}]) class SubprocessError(ExporterError): @@ -83,21 +82,35 @@ class SubprocessError(ExporterError): should inherit from SubprocessError """ - __TYPE = 'subprocess' - - def __init__(self, message, *args, code: int = 500, process: str = '', cmd: str = '', - returncode: int = None, path: str = '', **kwargs): + __TYPE = "subprocess" + + def __init__( + self, + message, + *args, + code: int = 500, + process: str = "", + cmd: str = "", + returncode: int = None, + path: str = "", + **kwargs, + ): super().__init__(message, *args, code=code, **kwargs) self.process = process self.cmd = cmd self.return_code = returncode self.path = path - self.attr_stack.append([self.__TYPE, { - 'process': self.process, - 'cmd': self.cmd, - 'returncode': self.return_code, - 'path': self.path, - }]) + self.attr_stack.append( + [ + self.__TYPE, + { + "process": self.process, + "cmd": self.cmd, + "returncode": self.return_code, + "path": self.path, + }, + ] + ) class ProviderError(PluginError): @@ -105,12 +118,12 @@ class ProviderError(PluginError): ProviderError """ - __TYPE = 'provider' + __TYPE = "provider" - def __init__(self, message, *args, provider: str = '', **kwargs): + def __init__(self, message, *args, provider: str = "", **kwargs): super().__init__(message, *args, **kwargs) self.provider = provider - self.attr_stack.append([self.__TYPE, {'provider': self.provider}]) + self.attr_stack.append([self.__TYPE, {"provider": self.provider}]) class DownloadError(ProviderError): @@ -118,16 +131,20 @@ class DownloadError(ProviderError): should inherit from DownloadError """ - __TYPE = 'download' + __TYPE = "download" - def __init__(self, message, *args, download_url: str = '', response: str = '', **kwargs): + def __init__( + self, message, *args, download_url: str = "", response: str = "", **kwargs + ): super().__init__(message, *args, **kwargs) self.download_url = download_url self.response = response - self.attr_stack.append([self.__TYPE, { - 'download_url': self.download_url, - 'response': self.response - }]) + self.attr_stack.append( + [ + self.__TYPE, + {"download_url": self.download_url, "response": self.response}, + ] + ) class MetadataError(ProviderError): @@ -135,48 +152,77 @@ class MetadataError(ProviderError): should inherit from MetadataError """ - __TYPE = 'metadata' + __TYPE = "metadata" - def __init__(self, message, *args, metadata_url: str = '', response: str = '', **kwargs): + def __init__( + self, message, *args, metadata_url: str = "", response: str = "", **kwargs + ): super().__init__(message, *args, **kwargs) self.metadata_url = metadata_url self.response = response - self.attr_stack.append([self.__TYPE, { - 'metadata_url': self.metadata_url, - 'response': self.response - }]) + self.attr_stack.append( + [ + self.__TYPE, + {"metadata_url": self.metadata_url, "response": self.response}, + ] + ) + class CorruptedError(RendererError): + __TYPE = "corrupted" - __TYPE = 'corrupted' + def __init__(self, *args, renderer_class: str = "", **kwargs): + super().__init__( + "File is corrupted, impossible to render, please check it's integrity", + *args, + renderer_class, + **kwargs, + ) - def __init__(self, *args, renderer_class: str = '', **kwargs): - super().__init__("File is corrupted, impossible to render, please check it's integrity", *args, renderer_class, **kwargs) class TooBigToRenderError(ProviderError): """If the user tries to render a file larger than a server specified maximum, throw a TooBigToRenderError. """ - __TYPE = 'too_big_to_render' - - def __init__(self, message, *args, requested_size: int = None, maximum_size: int = None, - code: int = 400, **kwargs): + __TYPE = "too_big_to_render" + + def __init__( + self, + message, + *args, + requested_size: int = None, + maximum_size: int = None, + code: int = 400, + **kwargs, + ): super().__init__(message, *args, code=code, **kwargs) self.requested_size = requested_size self.maximum_size = maximum_size - self.attr_stack.append([self.__TYPE, { - 'requested_size': self.requested_size, - 'maximum_size': self.maximum_size, - }]) + self.attr_stack.append( + [ + self.__TYPE, + { + "requested_size": self.requested_size, + "maximum_size": self.maximum_size, + }, + ] + ) class DriverManagerError(PluginError): - - __TYPE = 'drivermanager' - - def __init__(self, message, *args, namespace: str = '', name: str = '', invoke_on_load: bool = None, - invoke_args: dict = None, **kwargs): + __TYPE = "drivermanager" + + def __init__( + self, + message, + *args, + namespace: str = "", + name: str = "", + invoke_on_load: bool = None, + invoke_args: dict = None, + **kwargs, + ): super().__init__(message, *args, **kwargs) self.namespace = namespace @@ -184,12 +230,17 @@ def __init__(self, message, *args, namespace: str = '', name: str = '', invoke_o self.invoke_on_load = invoke_on_load self.invoke_args = invoke_args or {} - self.attr_stack.append([self.__TYPE, { - 'namespace': self.namespace, - 'name': self.name, - 'invoke_on_load': self.invoke_on_load, - 'invoke_args': self.invoke_args, - }]) + self.attr_stack.append( + [ + self.__TYPE, + { + "namespace": self.namespace, + "name": self.name, + "invoke_on_load": self.invoke_on_load, + "invoke_args": self.invoke_args, + }, + ] + ) class MakeProviderError(DriverManagerError): @@ -206,14 +257,14 @@ class UnsupportedExtensionError(DriverManagerError): trips this) and includes a handler_type argsument """ - __TYPE = 'unsupported_extension' + __TYPE = "unsupported_extension" - def __init__(self, *args, code: int = 400, handler_type: str = '', **kwargs): + def __init__(self, *args, code: int = 400, handler_type: str = "", **kwargs): super().__init__(*args, code=code, **kwargs) self.handler_type = handler_type - self.attr_stack.append([self.__TYPE, {'handler_type': self.handler_type}]) + self.attr_stack.append([self.__TYPE, {"handler_type": self.handler_type}]) class MakeRendererError(UnsupportedExtensionError): @@ -222,8 +273,9 @@ class MakeRendererError(UnsupportedExtensionError): """ def __init__(self, *args, **kwargs): - super().__init__(settings.UNSUPPORTED_RENDER_MSG, *args, handler_type='renderer', - **kwargs) + super().__init__( + settings.UNSUPPORTED_RENDER_MSG, *args, handler_type="renderer", **kwargs + ) class MakeExporterError(UnsupportedExtensionError): @@ -232,5 +284,6 @@ class MakeExporterError(UnsupportedExtensionError): """ def __init__(self, *args, **kwargs): - super().__init__(settings.UNSUPPORTED_EXPORTER_MSG, *args, handler_type='exporter', - **kwargs) + super().__init__( + settings.UNSUPPORTED_EXPORTER_MSG, *args, handler_type="exporter", **kwargs + ) diff --git a/mfr/core/extension.py b/mfr/core/extension.py index cec133e7c..d5a8ff6a5 100644 --- a/mfr/core/extension.py +++ b/mfr/core/extension.py @@ -4,9 +4,7 @@ class BaseExporter(metaclass=abc.ABCMeta): - def __init__(self, ext, source_file_path, output_file_path, format, metadata): - """Initialize the base exporter. :param ext: the name of the extension to be exported @@ -20,56 +18,59 @@ def __init__(self, ext, source_file_path, output_file_path, format, metadata): self.output_file_path = output_file_path self.format = format self.metadata = metadata - self.exporter_metrics = MetricsRecord('exporter') + self.exporter_metrics = MetricsRecord("exporter") if self._get_module_name(): self.metrics = self.exporter_metrics.new_subrecord(self._get_module_name()) - self.exporter_metrics.merge({ - 'class': self._get_module_name(), - 'format': self.format, - 'source_path': str(self.source_file_path), - 'output_path': str(self.output_file_path), - # 'error': 'error_t', - # 'elapsed': 'elpased_t', - }) + self.exporter_metrics.merge( + { + "class": self._get_module_name(), + "format": self.format, + "source_path": str(self.source_file_path), + "output_path": str(self.output_file_path), + # 'error': 'error_t', + # 'elapsed': 'elpased_t', + } + ) @abc.abstractmethod def export(self): pass def _get_module_name(self): - return self.__module__ \ - .replace('mfr.extensions.', '', 1) \ - .replace('.export', '', 1) + return self.__module__.replace("mfr.extensions.", "", 1).replace( + ".export", "", 1 + ) class BaseRenderer(metaclass=abc.ABCMeta): - def __init__(self, metadata, file_path, url, assets_url, export_url): self.metadata = metadata self.file_path = file_path self.url = url - self.assets_url = f'{assets_url}/{self._get_module_name()}' + self.assets_url = f"{assets_url}/{self._get_module_name()}" self.export_url = export_url - self.renderer_metrics = MetricsRecord('renderer') + self.renderer_metrics = MetricsRecord("renderer") if self._get_module_name(): self.metrics = self.renderer_metrics.new_subrecord(self._get_module_name()) - self.renderer_metrics.merge({ - 'class': self._get_module_name(), - 'ext': self.metadata.ext, - 'url': self.url, - 'export_url': self.export_url, - 'file_path': self.file_path, - # 'error': 'error_t', - # 'elapsed': 'elpased_t', - }) + self.renderer_metrics.merge( + { + "class": self._get_module_name(), + "ext": self.metadata.ext, + "url": self.url, + "export_url": self.export_url, + "file_path": self.file_path, + # 'error': 'error_t', + # 'elapsed': 'elpased_t', + } + ) # unoconv gets file_required and cache_result from its subrenderer, which is constructed # at the end of __init__ try: - self.renderer_metrics.add('file_required', self.file_required) - self.renderer_metrics.add('cache_result', self.cache_result) + self.renderer_metrics.add("file_required", self.file_required) + self.renderer_metrics.add("cache_result", self.cache_result) except AttributeError: pass @@ -90,6 +91,6 @@ def cache_result(self): pass def _get_module_name(self): - return self.__module__ \ - .replace('mfr.extensions.', '', 1) \ - .replace('.render', '', 1) + return self.__module__.replace("mfr.extensions.", "", 1).replace( + ".render", "", 1 + ) diff --git a/mfr/core/metrics.py b/mfr/core/metrics.py index 9dc29aefe..d84cdd97c 100644 --- a/mfr/core/metrics.py +++ b/mfr/core/metrics.py @@ -2,10 +2,7 @@ def _merge_dicts(a, b, path=None): - """"merges b into a - - Taken from: http://stackoverflow.com/a/7205107 - """ + """merges b into a Taken from: http://stackoverflow.com/a/7205107""" if path is None: path = [] for key in b: @@ -15,7 +12,7 @@ def _merge_dicts(a, b, path=None): elif a[key] == b[key]: pass # same leaf value else: - raise Exception('Conflict at %s' % '.'.join(path + [str(key)])) + raise Exception("Conflict at %s" % ".".join(path + [str(key)])) else: a[key] = b[key] @@ -84,7 +81,7 @@ def _set_dotted_key(store, key, value): ``self._metrics['foo']['bar'] = 'moo'``. This method is neither resilient nor intelligent and will react with bad grace if one of the keys already exists and is not a dict key. """ - parts = key.split('.') + parts = key.split(".") current = store for part in parts[:-1]: if part not in current: @@ -110,8 +107,7 @@ def key(self): return self.category def serialize(self): - """Returns its metrics with the metrics for each of the subrecords included under their key. - """ + """Returns its metrics with the metrics for each of the subrecords included under their key.""" metrics = super().serialize() for subrecord in self.subrecords: metrics[subrecord.key] = subrecord.serialize() @@ -139,7 +135,7 @@ def __init__(self, category, name): @property def key(self): """ID string for this subrecord: '{category}_{name}'""" - return f'{self.category}_{self.name}' + return f"{self.category}_{self.name}" def new_subrecord(self, name): """Creates and saves a new subrecord. The new subrecord will have its category set to the diff --git a/mfr/core/provider.py b/mfr/core/provider.py index fb7a2b722..123db49bc 100644 --- a/mfr/core/provider.py +++ b/mfr/core/provider.py @@ -1,11 +1,11 @@ import abc -import markupsafe import furl +import markupsafe from mfr.core import exceptions -from mfr.server import settings from mfr.core.metrics import MetricsRecord +from mfr.server import settings class BaseProvider(metaclass=abc.ABCMeta): @@ -22,17 +22,19 @@ def __init__(self, request, url, action=None): message="{} is not a permitted provider domain.".format( markupsafe.escape(url_netloc) ), - code=400 + code=400, ) self.url = url self.action = action - self.provider_metrics = MetricsRecord('provider') + self.provider_metrics = MetricsRecord("provider") self.metrics = self.provider_metrics.new_subrecord(self.NAME) - self.provider_metrics.merge({ - 'type': self.NAME, - 'url': str(self.url), - }) + self.provider_metrics.merge( + { + "type": self.NAME, + "url": str(self.url), + } + ) @abc.abstractmethod def NAME(self): @@ -49,8 +51,9 @@ def download(self): class ProviderMetadata: - - def __init__(self, name, ext, content_type, unique_key, download_url, stable_id=None): + def __init__( + self, name, ext, content_type, unique_key, download_url, stable_id=None + ): self.name = name self.ext = ext self.content_type = content_type @@ -60,10 +63,10 @@ def __init__(self, name, ext, content_type, unique_key, download_url, stable_id= def serialize(self): return { - 'name': self.name, - 'ext': self.ext, - 'content_type': self.content_type, - 'unique_key': str(self.unique_key), - 'download_url': str(self.download_url), - 'stable_id': None if self.stable_id is None else str(self.stable_id), + "name": self.name, + "ext": self.ext, + "content_type": self.content_type, + "unique_key": str(self.unique_key), + "download_url": str(self.download_url), + "stable_id": None if self.stable_id is None else str(self.stable_id), } diff --git a/mfr/core/remote_logging.py b/mfr/core/remote_logging.py index d7460c584..1037ef207 100644 --- a/mfr/core/remote_logging.py +++ b/mfr/core/remote_logging.py @@ -3,11 +3,10 @@ import logging import aiohttp +from waterbutler.core.utils import async_retry from mfr.server import settings from mfr.version import __version__ -from waterbutler.core.utils import async_retry - logger = logging.getLogger(__name__) @@ -18,119 +17,139 @@ async def log_analytics(request, metrics, is_error=False): return keen_payload = copy.deepcopy(metrics) - keen_payload['meta'] = { - 'mfr_version': __version__, - 'epoch': 1, + keen_payload["meta"] = { + "mfr_version": __version__, + "epoch": 1, } keen_payload.update(request) - keen_payload['keen'] = { - 'addons': [ + keen_payload["keen"] = { + "addons": [ { - 'name': 'keen:url_parser', - 'input': { - 'url': 'request.url' - }, - 'output': 'request.info', + "name": "keen:url_parser", + "input": {"url": "request.url"}, + "output": "request.info", }, { # private - 'name': 'keen:ip_to_geo', - 'input': { - 'ip': 'tech.ip', - 'remove_ip_property': True, + "name": "keen:ip_to_geo", + "input": { + "ip": "tech.ip", + "remove_ip_property": True, }, - 'output': 'geo', + "output": "geo", }, { # private - 'name': 'keen:ua_parser', - 'input': { - 'ua_string': 'tech.ua', + "name": "keen:ua_parser", + "input": { + "ua_string": "tech.ua", }, - 'output': 'tech.info', + "output": "tech.info", }, ], } - if request['referrer']['url'] is not None: - keen_payload['keen']['addons'].append({ - 'name': 'keen:referrer_parser', - 'input': { - 'referrer_url': 'referrer.url', - 'page_url': 'request.url' - }, - 'output': 'referrer.info' - }) - keen_payload['keen']['addons'].append({ - 'name': 'keen:url_parser', - 'input': { - 'url': 'referrer.url' - }, - 'output': 'referrer.info', - }) + if request["referrer"]["url"] is not None: + keen_payload["keen"]["addons"].append( + { + "name": "keen:referrer_parser", + "input": {"referrer_url": "referrer.url", "page_url": "request.url"}, + "output": "referrer.info", + } + ) + keen_payload["keen"]["addons"].append( + { + "name": "keen:url_parser", + "input": {"url": "referrer.url"}, + "output": "referrer.info", + } + ) # massage file data, if available file_metadata = None try: - file_metadata = metrics['provider']['provider_osf']['metadata']['raw']['data'] + file_metadata = metrics["provider"]["provider_osf"]["metadata"]["raw"]["data"] except (KeyError, TypeError): pass else: _munge_file_metadata(file_metadata) # send the private payload - private_collection = 'mfr_errors' if is_error else 'mfr_action' - if (is_error and settings.KEEN_PRIVATE_LOG_ERRORS) or settings.KEEN_PRIVATE_LOG_VIEWS: - await _send_to_keen(keen_payload, private_collection, settings.KEEN_PRIVATE_PROJECT_ID, - settings.KEEN_PRIVATE_WRITE_KEY, keen_payload['handler']['type'], - domain='private') + private_collection = "mfr_errors" if is_error else "mfr_action" + if ( + is_error and settings.KEEN_PRIVATE_LOG_ERRORS + ) or settings.KEEN_PRIVATE_LOG_VIEWS: + await _send_to_keen( + keen_payload, + private_collection, + settings.KEEN_PRIVATE_PROJECT_ID, + settings.KEEN_PRIVATE_WRITE_KEY, + keen_payload["handler"]["type"], + domain="private", + ) if ( - keen_payload['handler']['type'] != 'render' or file_metadata is None or is_error or not settings.KEEN_PUBLIC_LOG_VIEWS + keen_payload["handler"]["type"] != "render" + or file_metadata is None + or is_error + or not settings.KEEN_PUBLIC_LOG_VIEWS ): return # build and ship the public file stats payload - public_payload = _build_public_file_payload('view_file', request, file_metadata) - await _send_to_keen(public_payload, 'file_stats', settings.KEEN_PUBLIC_PROJECT_ID, - settings.KEEN_PUBLIC_WRITE_KEY, keen_payload['handler']['type'], - domain='public') + public_payload = _build_public_file_payload("view_file", request, file_metadata) + await _send_to_keen( + public_payload, + "file_stats", + settings.KEEN_PUBLIC_PROJECT_ID, + settings.KEEN_PUBLIC_WRITE_KEY, + keen_payload["handler"]["type"], + domain="public", + ) @async_retry(retries=5, backoff=5) -async def _send_to_keen(payload, collection, project_id, write_key, action, domain='private'): +async def _send_to_keen( + payload, collection, project_id, write_key, action, domain="private" +): """Serialize and send an event to Keen. If an error occurs, try up to five more times. Will raise an excpetion if the event cannot be sent.""" - serialized = json.dumps(payload).encode('UTF-8') + serialized = json.dumps(payload).encode("UTF-8") logger.debug(f"Serialized payload: {serialized}") headers = { - 'Content-Type': 'application/json', - 'Authorization': write_key, + "Content-Type": "application/json", + "Authorization": write_key, } - url = '{}/{}/projects/{}/events/{}'.format(settings.KEEN_API_BASE_URL, - settings.KEEN_API_VERSION, - project_id, collection) + url = "{}/{}/projects/{}/events/{}".format( + settings.KEEN_API_BASE_URL, settings.KEEN_API_VERSION, project_id, collection + ) - async with aiohttp.request('POST', url, headers=headers, data=serialized) as resp: + async with aiohttp.request("POST", url, headers=headers, data=serialized) as resp: if resp.status == 201: - logger.info(f'Successfully logged {action} to {collection} collection in {domain} Keen') + logger.info( + f"Successfully logged {action} to {collection} collection in {domain} Keen" + ) else: - raise Exception('Failed to log {} to {} collection in {} Keen. Status: {} Error: {}'.format( - action, collection, domain, str(int(resp.status)), await resp.read() - )) + raise Exception( + "Failed to log {} to {} collection in {} Keen. Status: {} Error: {}".format( + action, collection, domain, str(int(resp.status)), await resp.read() + ) + ) return def _scrub_headers_for_keen(payload, MAX_ITERATIONS=10): - """ Scrub unwanted characters like \\.\\ from the keys in the keen payload """ + """Scrub unwanted characters like \\.\\ from the keys in the keen payload""" scrubbed_payload = {} for key in sorted(payload): - scrubbed_key = key.replace('.', '-') + scrubbed_key = key.replace(".", "-") # if our new scrubbed key is already in the payload, we need to increment it if scrubbed_key in scrubbed_payload: - for i in range(1, MAX_ITERATIONS + 1): # try MAX_ITERATION times, then give up & drop it - incremented_key = f'{scrubbed_key}-{i}' + for i in range( + 1, MAX_ITERATIONS + 1 + ): # try MAX_ITERATION times, then give up & drop it + incremented_key = f"{scrubbed_key}-{i}" if incremented_key not in scrubbed_payload: # we found an unused key! scrubbed_payload[incremented_key] = payload[key] break @@ -146,85 +165,84 @@ def _serialize_request(request): return {} headers_dict = {} - for (k, v) in sorted(request.headers.get_all()): - if k not in ('Authorization', 'Cookie', 'User-Agent',): + for k, v in sorted(request.headers.get_all()): + if k not in ( + "Authorization", + "Cookie", + "User-Agent", + ): headers_dict[k] = v headers_dict = _scrub_headers_for_keen(headers_dict) serialized = { - 'tech': { - 'ip': request.remote_ip, - 'ua': request.headers['User-Agent'], + "tech": { + "ip": request.remote_ip, + "ua": request.headers["User-Agent"], }, - 'request': { - 'method': request.method, - 'url': request.full_url(), - 'time': request.request_time(), - 'headers': headers_dict, + "request": { + "method": request.method, + "url": request.full_url(), + "time": request.request_time(), + "headers": headers_dict, }, - 'referrer': { - 'url': None, + "referrer": { + "url": None, }, } - if 'Referer' in request.headers: - referrer = request.headers['Referer'] - serialized['referrer']['url'] = referrer + if "Referer" in request.headers: + referrer = request.headers["Referer"] + serialized["referrer"]["url"] = referrer return serialized def _build_public_file_payload(action, request, file_metadata): public_payload = { - 'meta': { - 'epoch': 1, + "meta": { + "epoch": 1, }, - 'request': { - 'url': request['request']['url'] + "request": {"url": request["request"]["url"]}, + "anon": { # intended for anonymized geolocation, never implemented + "country": None, + "continent": None, }, - 'anon': { # intended for anonymized geolocation, never implemented - 'country': None, - 'continent': None, + "action": { + "type": action, }, - 'action': { - 'type': action, - }, - 'file': file_metadata, - 'keen': { - 'addons': [ + "file": file_metadata, + "keen": { + "addons": [ { - 'name': 'keen:url_parser', - 'input': { - 'url': 'request.url' - }, - 'output': 'request.info', + "name": "keen:url_parser", + "input": {"url": "request.url"}, + "output": "request.info", }, ], }, } try: - public_payload['node'] = {'id': file_metadata['resource']} + public_payload["node"] = {"id": file_metadata["resource"]} except KeyError: pass - if request['referrer']['url'] is not None: - public_payload['referrer'] = request['referrer'] # .info added via keen addons - public_payload['keen']['addons'].append({ - 'name': 'keen:referrer_parser', - 'input': { - 'referrer_url': 'referrer.url', - 'page_url': 'request.url' - }, - 'output': 'referrer.info' - }) - public_payload['keen']['addons'].append({ - 'name': 'keen:url_parser', - 'input': { - 'url': 'referrer.url' - }, - 'output': 'referrer.info', - }) + if request["referrer"]["url"] is not None: + public_payload["referrer"] = request["referrer"] # .info added via keen addons + public_payload["keen"]["addons"].append( + { + "name": "keen:referrer_parser", + "input": {"referrer_url": "referrer.url", "page_url": "request.url"}, + "output": "referrer.info", + } + ) + public_payload["keen"]["addons"].append( + { + "name": "keen:url_parser", + "input": {"url": "referrer.url"}, + "output": "referrer.info", + } + ) return public_payload @@ -234,21 +252,26 @@ def _munge_file_metadata(metadata): return None try: - file_extra = metadata.pop('extra') + file_extra = metadata.pop("extra") except KeyError: pass else: - metadata['extra'] = { - 'common': {}, - metadata['provider']: file_extra, + metadata["extra"] = { + "common": {}, + metadata["provider"]: file_extra, } # synthetic fields to make Keen queries easier/prettier - metadata['full_path'] = '/'.join([ - '', metadata['resource'], metadata['provider'], metadata['path'].lstrip('/') - ]) - metadata['full_materialized'] = '/'.join([ - '', metadata['resource'], metadata['provider'], metadata['materialized'].lstrip('/') - ]) + metadata["full_path"] = "/".join( + ["", metadata["resource"], metadata["provider"], metadata["path"].lstrip("/")] + ) + metadata["full_materialized"] = "/".join( + [ + "", + metadata["resource"], + metadata["provider"], + metadata["materialized"].lstrip("/"), + ] + ) return metadata diff --git a/mfr/core/utils.py b/mfr/core/utils.py index 57c5ad68b..6b28388c6 100644 --- a/mfr/core/utils.py +++ b/mfr/core/utils.py @@ -1,4 +1,5 @@ from importlib.metadata import entry_points + from stevedore import driver from mfr.core import exceptions @@ -16,38 +17,43 @@ def make_provider(name, request, url, action=None): """ try: return driver.DriverManager( - namespace='mfr.providers', + namespace="mfr.providers", name=name.lower(), invoke_on_load=True, - invoke_args=(request, url, ), - invoke_kwds={'action': action}, + invoke_args=( + request, + url, + ), + invoke_kwds={"action": action}, ).driver except RuntimeError: raise exceptions.MakeProviderError( f'"{name.lower()}" is not a supported provider', - namespace='mfr.providers', + namespace="mfr.providers", name=name.lower(), invoke_on_load=True, invoke_args={ - 'request': request, - 'url': url, - } + "request": request, + "url": url, + }, ) + def fix_name(name: str): - name = name.removeprefix('.').replace('+', 'p') - if name == 'lasso[89]': - return 'lasso' - elif name == 'php[345]': - return 'php' - elif name == 'css.in': - return 'css' - elif name == 'js.in': - return 'js' - elif name == 'xul.in': - return 'xul' + name = name.removeprefix(".").replace("+", "p") + if name == "lasso[89]": + return "lasso" + elif name == "php[345]": + return "php" + elif name == "css.in": + return "css" + elif name == "js.in": + return "js" + elif name == "xul.in": + return "xul" return name + def make_exporter(name, source_file_path, output_file_path, file_format, metadata): """Returns an instance of :class:`mfr.core.extension.BaseExporter` @@ -59,25 +65,31 @@ def make_exporter(name, source_file_path, output_file_path, file_format, metadat :rtype: :class:`mfr.core.extension.BaseExporter` """ - normalized_name = fix_name(name and name.lower()) or 'none' + normalized_name = fix_name(name and name.lower()) or "none" try: return driver.DriverManager( - namespace='mfr.exporters', + namespace="mfr.exporters", name=normalized_name, invoke_on_load=True, - invoke_args=(normalized_name, source_file_path, output_file_path, file_format, metadata), + invoke_args=( + normalized_name, + source_file_path, + output_file_path, + file_format, + metadata, + ), ).driver except RuntimeError: raise exceptions.MakeExporterError( - namespace='mfr.exporters', + namespace="mfr.exporters", name=normalized_name, invoke_on_load=True, invoke_args={ - 'source_file_path': source_file_path, - 'output_file_path': output_file_path, - 'format': file_format, - } + "source_file_path": source_file_path, + "output_file_path": output_file_path, + "format": file_format, + }, ) @@ -94,31 +106,31 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url): :rtype: :class:`mfr.core.extension.BaseRenderer` """ - normalized_name = fix_name(name and name.lower()) or 'none' + normalized_name = fix_name(name and name.lower()) or "none" try: return driver.DriverManager( - namespace='mfr.renderers', + namespace="mfr.renderers", name=normalized_name, invoke_on_load=True, invoke_args=(metadata, file_path, url, assets_url, export_url), ).driver except RuntimeError: raise exceptions.MakeRendererError( - namespace='mfr.renderers', + namespace="mfr.renderers", name=normalized_name, invoke_on_load=True, invoke_args={ - 'metadata': metadata.serialize(), - 'file_path': file_path, - 'url': url, - 'assets_url': assets_url, - 'export_url': export_url, - } + "metadata": metadata.serialize(), + "file_path": file_path, + "url": url, + "assets_url": assets_url, + "export_url": export_url, + }, ) def get_renderer_name(name: str) -> str: - """ Return the name of the renderer used for a certain file extension. + """Return the name of the renderer used for a certain file extension. :param str name: The name of the extension to get the renderer name for. (.jpg, .docx, etc) @@ -127,12 +139,12 @@ def get_renderer_name(name: str) -> str: # `ep_iterator` is an iterable object. Must convert it to a `list` for access. # `list()` can only be called once because the iterator moves to the end after conversion. - ep = entry_points().select(group='mfr.renderers', name=name.lower()) + ep = entry_points().select(group="mfr.renderers", name=name.lower()) ep_list = list(ep) # Empty list indicates unsupported file type. Return '' and let `make_renderer()` handle it. if len(ep_list) == 0: - return '' + return "" # If the file type is supported, there must be only one element in the list. assert len(ep_list) == 1 @@ -140,7 +152,7 @@ def get_renderer_name(name: str) -> str: def get_exporter_name(name: str) -> str: - """ Return the name of the exporter used for a certain file extension. + """Return the name of the exporter used for a certain file extension. :param str name: The name of the extension to get the exporter name for. (.jpg, .docx, etc) @@ -149,24 +161,24 @@ def get_exporter_name(name: str) -> str: # `ep_iterator` is an iterable object. Must convert it to a `list` for access. # `list()` can only be called once because the iterator moves to the end after conversion. - ep = entry_points().select(group='mfr.exporters', name=name.lower()) + ep = entry_points().select(group="mfr.exporters", name=name.lower()) ep_list = list(ep) # Empty list indicates unsupported export type. Return '' and let `make_exporter()` handle it. if len(ep_list) == 0: - return '' + return "" # If the export type is supported, there must be only one element in the list. assert len(ep_list) == 1 return ep_list[0].value.split(":")[-1] -def sizeof_fmt(num, suffix='B'): +def sizeof_fmt(num, suffix="B"): if abs(num) < 1000: - return '{:3.0f}{}'.format(num, suffix) + return "{:3.0f}{}".format(num, suffix) - for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: + for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: if abs(num) < 1000.0: - return '{:3.1f}{}{}'.format(num, unit, suffix) + return "{:3.1f}{}{}".format(num, unit, suffix) num /= 1000.0 - return '{:.1f}{}{}'.format(num, 'Y', suffix) + return "{:.1f}{}{}".format(num, "Y", suffix) diff --git a/mfr/extensions/audio/render.py b/mfr/extensions/audio/render.py index 00b8be3fd..ce6c19b5d 100644 --- a/mfr/extensions/audio/render.py +++ b/mfr/extensions/audio/render.py @@ -7,11 +7,9 @@ class AudioRenderer(extension.BaseRenderer): - TEMPLATE = TemplateLookup( - directories=[ - os.path.join(os.path.dirname(__file__), 'templates') - ]).get_template('viewer.mako') + directories=[os.path.join(os.path.dirname(__file__), "templates")] + ).get_template("viewer.mako") def render(self): safe_url = escape_url_for_template(self.url) diff --git a/mfr/extensions/codepygments/exceptions.py b/mfr/extensions/codepygments/exceptions.py index 8b7581faf..383fd052f 100644 --- a/mfr/extensions/codepygments/exceptions.py +++ b/mfr/extensions/codepygments/exceptions.py @@ -1,9 +1,9 @@ from mfr.core.exceptions import RendererError -class CodePygmentsRendererError(RendererError): +class CodePygmentsRendererError(RendererError): def __init__(self, message, *args, **kwargs): - super().__init__(message, *args, renderer_class='codepygments', **kwargs) + super().__init__(message, *args, renderer_class="codepygments", **kwargs) class FileTooLargeError(CodePygmentsRendererError): @@ -11,19 +11,25 @@ class FileTooLargeError(CodePygmentsRendererError): relating to limit on size of file to display should inherit from FileTooLargeError """ - __TYPE = 'codepygments_file_too_large' + __TYPE = "codepygments_file_too_large" - def __init__(self, message, *args, code: int = 400, file_size: int = None, max_size: int = None, - **kwargs): + def __init__( + self, + message, + *args, + code: int = 400, + file_size: int = None, + max_size: int = None, + **kwargs, + ): super().__init__(message, *args, code=code, **kwargs) self.file_size = file_size self.max_size = max_size - self.attr_stack.append([self.__TYPE, { - 'file_size': self.file_size, - 'max_size': self.max_size - }]) + self.attr_stack.append( + [self.__TYPE, {"file_size": self.file_size, "max_size": self.max_size}] + ) class FileDecodingError(CodePygmentsRendererError): @@ -31,16 +37,28 @@ class FileDecodingError(CodePygmentsRendererError): relating to decoding of file to display should inherit from FileDecodingError """ - __TYPE = 'codepygments_file_decoding' + __TYPE = "codepygments_file_decoding" - def __init__(self, message, *args, code: int = 400, original_exception: Exception = None, - category: str = '', **kwargs): + def __init__( + self, + message, + *args, + code: int = 400, + original_exception: Exception = None, + category: str = "", + **kwargs, + ): super().__init__(message, *args, code=code, **kwargs) self.category = category self.original_exception = self._format_original_exception(original_exception) - self.attr_stack.append([self.__TYPE, { - 'original_exception': self.original_exception, - 'category': self.category, - }]) + self.attr_stack.append( + [ + self.__TYPE, + { + "original_exception": self.original_exception, + "category": self.category, + }, + ] + ) diff --git a/mfr/extensions/codepygments/render.py b/mfr/extensions/codepygments/render.py index d28739610..c66a6e05a 100644 --- a/mfr/extensions/codepygments/render.py +++ b/mfr/extensions/codepygments/render.py @@ -1,47 +1,44 @@ -import os import logging +import os import chardet -from humanfriendly import format_size import pygments +import pygments.formatters import pygments.lexers import pygments.lexers.special -import pygments.formatters -from pygments.util import ClassNotFound +from humanfriendly import format_size from mako.lookup import TemplateLookup +from pygments.util import ClassNotFound from mfr.core import extension -from mfr.extensions.codepygments import settings -from mfr.extensions.codepygments import exceptions +from mfr.extensions.codepygments import exceptions, settings logger = logging.getLogger(__name__) class CodePygmentsRenderer(extension.BaseRenderer): - DEFAULT_LEXER = pygments.lexers.special.TextLexer TEMPLATE = TemplateLookup( - directories=[ - os.path.join(os.path.dirname(__file__), 'templates') - ]).get_template('viewer.mako') + directories=[os.path.join(os.path.dirname(__file__), "templates")] + ).get_template("viewer.mako") def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.metrics.add('pygments_version', pygments.__version__) + self.metrics.add("pygments_version", pygments.__version__) def render(self): file_size = os.path.getsize(self.file_path) if file_size > settings.MAX_SIZE: raise exceptions.FileTooLargeError( - 'Text files larger than {} are not rendered. Please download ' - 'the file to view.'.format(format_size(settings.MAX_SIZE, binary=True)), + "Text files larger than {} are not rendered. Please download " + "the file to view.".format(format_size(settings.MAX_SIZE, binary=True)), file_size=file_size, max_size=settings.MAX_SIZE, extension=self.metadata.ext, ) - with open(self.file_path, 'rb') as fp: + with open(self.file_path, "rb") as fp: body = self._render_html(fp, self.metadata.ext) return self.TEMPLATE.render(base=self.assets_url, body=body) @@ -62,17 +59,17 @@ def _render_html(self, fp, ext, *args, **kwargs): formatter = pygments.formatters.HtmlFormatter() data = fp.read() - content, encoding = None, 'utf-8' + content, encoding = None, "utf-8" try: content = data.decode(encoding) except UnicodeDecodeError: detected_encoding = chardet.detect(data) - encoding = detected_encoding.get('encoding', None) + encoding = detected_encoding.get("encoding", None) if encoding is None: raise exceptions.FileDecodingError( - message='Unable to detect encoding of source file.', + message="Unable to detect encoding of source file.", extension=ext, - category='undetectable_encoding', + category="undetectable_encoding", code=400, ) @@ -80,9 +77,9 @@ def _render_html(self, fp, ext, *args, **kwargs): content = data.decode(encoding) except UnicodeDecodeError as err: raise exceptions.FileDecodingError( - message=f'Unable to decode file as {encoding}.', + message=f"Unable to decode file as {encoding}.", extension=ext, - category='undecodable', + category="undecodable", original_exception=err, code=400, ) @@ -91,24 +88,24 @@ def _render_html(self, fp, ext, *args, **kwargs): raise exceptions.FileDecodingError( message=f'File decoded to undefined using encoding "{encoding}"', extension=ext, - category='decoded_to_undefined', + category="decoded_to_undefined", code=500, ) - self.metrics.merge({'encoding': encoding, 'default_lexer': False}) + self.metrics.merge({"encoding": encoding, "default_lexer": False}) try: # check if there is a lexer available for more obscure file types if ext in settings.lexer_lib.keys(): lexer = pygments.lexers.get_lexer_by_name(settings.lexer_lib[ext]) - logger.debug('found pygments lexer by name') + logger.debug("found pygments lexer by name") else: lexer = pygments.lexers.guess_lexer_for_filename(ext, content) - logger.debug('found pygments lexer by guessing') + logger.debug("found pygments lexer by guessing") except ClassNotFound: - logger.debug('pygments lexer class not found! using default') - self.metrics.add('default_lexer', True) + logger.debug("pygments lexer class not found! using default") + self.metrics.add("default_lexer", True) lexer = self.DEFAULT_LEXER() - self.metrics.add('lexer', lexer.name) + self.metrics.add("lexer", lexer.name) return pygments.highlight(content, lexer, formatter) diff --git a/mfr/extensions/codepygments/settings.py b/mfr/extensions/codepygments/settings.py index 7659fa4db..081fcf658 100644 --- a/mfr/extensions/codepygments/settings.py +++ b/mfr/extensions/codepygments/settings.py @@ -1,20 +1,20 @@ from mfr import settings -config = settings.child('CODEPYGMENTS_EXTENSION_CONFIG') +config = settings.child("CODEPYGMENTS_EXTENSION_CONFIG") -MAX_SIZE = int(config.get('MAX_SIZE', 204800)) # 200kb +MAX_SIZE = int(config.get("MAX_SIZE", 204800)) # 200kb lexer_lib = { - '.gdt': 'xml', - '.pzfx': 'xml', - '.cmdi': 'xml', - '.rm5': 'xml', - '.eaf': 'xml', - '.qsf': 'json', - '.psyexp': 'html', - '.umbrella': 'json', - '.jst': 'js', - '.ijm': 'java', - '.csl': 'xml', - '.lss': 'xml' + ".gdt": "xml", + ".pzfx": "xml", + ".cmdi": "xml", + ".rm5": "xml", + ".eaf": "xml", + ".qsf": "json", + ".psyexp": "html", + ".umbrella": "json", + ".jst": "js", + ".ijm": "java", + ".csl": "xml", + ".lss": "xml", } diff --git a/mfr/extensions/image/exceptions.py b/mfr/extensions/image/exceptions.py index 642d217bc..637cbf9be 100644 --- a/mfr/extensions/image/exceptions.py +++ b/mfr/extensions/image/exceptions.py @@ -6,18 +6,30 @@ class PillowImageError(ExporterError): and relating to the Pillow Library should inherit from PillowImageError """ - __TYPE = 'image_pillow' + __TYPE = "image_pillow" - def __init__(self, message, *args, export_format: str = '', detected_format: str = '', - original_exception: Exception = None, **kwargs): - super().__init__(message, *args, exporter_class='image', **kwargs) + def __init__( + self, + message, + *args, + export_format: str = "", + detected_format: str = "", + original_exception: Exception = None, + **kwargs, + ): + super().__init__(message, *args, exporter_class="image", **kwargs) self.export_format = export_format self.detected_format = detected_format self.original_exception = self._format_original_exception(original_exception) - self.attr_stack.append([self.__TYPE, { - 'export_format': self.export_format, - 'detected_format': self.detected_format, - 'original_exception': self.original_exception, - }]) + self.attr_stack.append( + [ + self.__TYPE, + { + "export_format": self.export_format, + "detected_format": self.detected_format, + "original_exception": self.original_exception, + }, + ] + ) diff --git a/mfr/extensions/image/export.py b/mfr/extensions/image/export.py index 23a22409a..0f3599107 100644 --- a/mfr/extensions/image/export.py +++ b/mfr/extensions/image/export.py @@ -10,24 +10,25 @@ class ImageExporter(extension.BaseExporter): - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.metrics.add('pil_version', Image.__version__) + self.metrics.add("pil_version", Image.__version__) def export(self): - parts = self.format.split('.') + parts = self.format.split(".") image_type = parts[-1].lower() - max_size = {'w': None, 'h': None} + max_size = {"w": None, "h": None} if len(parts) == 2: - max_size['w'], max_size['h'] = (int(size) for size in parts[0].split('x')) - self.metrics.merge({ - 'type': image_type, - 'max_size_w': max_size['w'], - 'max_size_h': max_size['h'], - }) + max_size["w"], max_size["h"] = (int(size) for size in parts[0].split("x")) + self.metrics.merge( + { + "type": image_type, + "max_size_w": max_size["w"], + "max_size_h": max_size["h"], + } + ) try: - if self.ext in ['.psd']: + if self.ext in [".psd"]: # silence warnings from psd-tools # Warnings warn of outdated depedency that is a pain to install # and about colors being possibly wrong @@ -38,27 +39,34 @@ def export(self): image = Image.open(self.source_file_path) # Only resize when both dimensions are available - if max_size.get('w') and max_size.get('h'): + if max_size.get("w") and max_size.get("h"): # resize the image to the w/h maximum specified - ratio = min(max_size['w'] / image.size[0], max_size['h'] / image.size[1]) - self.metrics.add('ratio', ratio) + ratio = min( + max_size["w"] / image.size[0], max_size["h"] / image.size[1] + ) + self.metrics.add("ratio", ratio) if ratio < 1: - size_tuple = (round(image.size[0] * ratio), round(image.size[1] * ratio)) + size_tuple = ( + round(image.size[0] * ratio), + round(image.size[1] * ratio), + ) image = image.resize(size_tuple, Image.Resampling.LANCZOS) # Mode 'P' is for paletted images. They must be converted to RGB before exporting to # jpeg, otherwise Pillow will throw an error. This is a temporary workaround, as the # conversion is imprecise and can be ugly. # See: https://stackoverflow.com/q/21669657 - if image.mode == 'P': - image = image.convert('RGB') + if image.mode == "P": + image = image.convert("RGB") # handle transparency # from https://github.com/python-pillow/Pillow/issues/2609 - if image.mode in ('RGBA', 'RGBa', 'LA') and image_type in ['jpeg', 'jpg']: + if image.mode in ("RGBA", "RGBa", "LA") and image_type in ["jpeg", "jpg"]: # JPEG has no transparency, so anything that was transparent gets changed to # EXPORT_BACKGROUND_COLOR. Default is white. - background = Image.new(image.mode[:-1], image.size, EXPORT_BACKGROUND_COLOR) + background = Image.new( + image.mode[:-1], image.size, EXPORT_BACKGROUND_COLOR + ) background.paste(image, image.split()[-1]) image = background @@ -68,8 +76,8 @@ def export(self): except (UnicodeDecodeError, OSError, FileNotFoundError) as err: os.path.splitext(os.path.split(self.source_file_path)[-1]) raise exceptions.PillowImageError( - 'Unable to export the file as a {}, please check that the ' - 'file is a valid image.'.format(image_type), + "Unable to export the file as a {}, please check that the " + "file is a valid image.".format(image_type), export_format=image_type, detected_format=self.detect_image_format(), original_exception=err, diff --git a/mfr/extensions/image/render.py b/mfr/extensions/image/render.py index dd8469433..452c0261b 100644 --- a/mfr/extensions/image/render.py +++ b/mfr/extensions/image/render.py @@ -5,18 +5,16 @@ from mfr.core import extension from mfr.extensions.image import settings -from mfr.extensions.utils import munge_url_for_localdev, escape_url_for_template +from mfr.extensions.utils import escape_url_for_template, munge_url_for_localdev class ImageRenderer(extension.BaseRenderer): - TEMPLATE = TemplateLookup( - directories=[ - os.path.join(os.path.dirname(__file__), 'templates') - ]).get_template('viewer.mako') + directories=[os.path.join(os.path.dirname(__file__), "templates")] + ).get_template("viewer.mako") def render(self): - self.metrics.add('needs_export', False) + self.metrics.add("needs_export", False) if self.metadata.ext in settings.EXPORT_EXCLUSIONS: download_url = munge_url_for_localdev(self.url) safe_url = escape_url_for_template(download_url.geturl()) @@ -24,15 +22,17 @@ def render(self): exported_url = furl.furl(self.export_url) if settings.EXPORT_MAXIMUM_SIZE and settings.EXPORT_TYPE: - exported_url.args['format'] = f'{settings.EXPORT_MAXIMUM_SIZE}.{settings.EXPORT_TYPE}' + exported_url.args["format"] = ( + f"{settings.EXPORT_MAXIMUM_SIZE}.{settings.EXPORT_TYPE}" + ) elif settings.EXPORT_TYPE: - exported_url.args['format'] = settings.EXPORT_TYPE + exported_url.args["format"] = settings.EXPORT_TYPE else: download_url = munge_url_for_localdev(self.url) safe_url = escape_url_for_template(download_url.geturl()) return self.TEMPLATE.render(base=self.assets_url, url=safe_url) - self.metrics.add('needs_export', True) + self.metrics.add("needs_export", True) safe_url = escape_url_for_template(exported_url.url) return self.TEMPLATE.render(base=self.assets_url, url=safe_url) diff --git a/mfr/extensions/image/settings.py b/mfr/extensions/image/settings.py index 148213805..97073d887 100644 --- a/mfr/extensions/image/settings.py +++ b/mfr/extensions/image/settings.py @@ -1,8 +1,8 @@ from mfr import settings -config = settings.child('IMAGE_EXTENSION_CONFIG') +config = settings.child("IMAGE_EXTENSION_CONFIG") -EXPORT_TYPE = config.get('EXPORT_TYPE', 'jpeg') -EXPORT_MAXIMUM_SIZE = config.get('EXPORT_MAXIMUM_SIZE', '2400x2400') -EXPORT_EXCLUSIONS = config.get('EXPORT_EXCLUSIONS', '.gif .ico').split(' ') -EXPORT_BACKGROUND_COLOR = config.get('EXPORT_BACKGROUND_COLOR', 'white') +EXPORT_TYPE = config.get("EXPORT_TYPE", "jpeg") +EXPORT_MAXIMUM_SIZE = config.get("EXPORT_MAXIMUM_SIZE", "2400x2400") +EXPORT_EXCLUSIONS = config.get("EXPORT_EXCLUSIONS", ".gif .ico").split(" ") +EXPORT_BACKGROUND_COLOR = config.get("EXPORT_BACKGROUND_COLOR", "white") diff --git a/mfr/extensions/ipynb/exceptions.py b/mfr/extensions/ipynb/exceptions.py index 1f3759032..a6194275f 100644 --- a/mfr/extensions/ipynb/exceptions.py +++ b/mfr/extensions/ipynb/exceptions.py @@ -2,17 +2,28 @@ class InvalidFormatError(RendererError): + __TYPE = "ipynb_invalid_format" - __TYPE = 'ipynb_invalid_format' + def __init__( + self, + message, + *args, + code: int = 400, + download_url: str = "", + original_exception: Exception = None, + **kwargs, + ): + super().__init__(message, *args, code=code, renderer_class="ipynb", **kwargs) - def __init__(self, message, *args, code: int = 400, download_url: str = '', - original_exception: Exception = None, **kwargs): - super().__init__(message, *args, code=code, renderer_class='ipynb', **kwargs) - - self.download_url = download_url, + self.download_url = (download_url,) self.original_exception = self._format_original_exception(original_exception) - self.attr_stack.append([self.__TYPE, { - 'download_url': self.download_url, - 'original_exception': self.original_exception, - }]) + self.attr_stack.append( + [ + self.__TYPE, + { + "download_url": self.download_url, + "original_exception": self.original_exception, + }, + ] + ) diff --git a/mfr/extensions/ipynb/render.py b/mfr/extensions/ipynb/render.py index 3e046c269..d158bbb25 100644 --- a/mfr/extensions/ipynb/render.py +++ b/mfr/extensions/ipynb/render.py @@ -11,16 +11,14 @@ class IpynbRenderer(extension.BaseRenderer): - TEMPLATE = TemplateLookup( - directories=[ - os.path.join(os.path.dirname(__file__), 'templates') - ]).get_template('viewer.mako') + directories=[os.path.join(os.path.dirname(__file__), "templates")] + ).get_template("viewer.mako") def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.metrics.add('nbformat_version', nbformat.__version__) - self.metrics.add('nbconvert_version', nbconvert.__version__) + self.metrics.add("nbformat_version", nbformat.__version__) + self.metrics.add("nbconvert_version", nbconvert.__version__) def render(self): try: @@ -28,20 +26,24 @@ def render(self): notebook = nbformat.reads(file_pointer.read(), as_version=4) except ValueError as err: raise exceptions.InvalidFormatError( - f'Could not read ipython notebook file. {str(err)}', + f"Could not read ipython notebook file. {str(err)}", extension=self.metadata.ext, download_url=str(self.metadata.download_url), original_exception=err, ) - exporter = HTMLExporter(config=Config({ - 'HTMLExporter': { - 'template_name': 'basic', - }, - 'CSSHtmlHeaderTransformer': { - 'enabled': False, - }, - })) + exporter = HTMLExporter( + config=Config( + { + "HTMLExporter": { + "template_name": "basic", + }, + "CSSHtmlHeaderTransformer": { + "enabled": False, + }, + } + ) + ) (body, _) = exporter.from_notebook_node(notebook) return self.TEMPLATE.render(base=self.assets_url, body=body) diff --git a/mfr/extensions/jamovi/exceptions.py b/mfr/extensions/jamovi/exceptions.py index 9a4ce4d4e..9da572442 100644 --- a/mfr/extensions/jamovi/exceptions.py +++ b/mfr/extensions/jamovi/exceptions.py @@ -2,9 +2,8 @@ class JamoviRendererError(RendererError): - def __init__(self, message, *args, **kwargs): - super().__init__(message, *args, renderer_class='jamovi', **kwargs) + super().__init__(message, *args, renderer_class="jamovi", **kwargs) class JamoviVersionError(JamoviRendererError): @@ -12,19 +11,32 @@ class JamoviVersionError(JamoviRendererError): data archive version should throw or subclass JamoviVersionError. """ - __TYPE = 'jamovi_version' + __TYPE = "jamovi_version" - def __init__(self, message, *args, code: int = 400, created_by: str = '', - actual_version: str = '', required_version: str = '', **kwargs): + def __init__( + self, + message, + *args, + code: int = 400, + created_by: str = "", + actual_version: str = "", + required_version: str = "", + **kwargs, + ): super().__init__(message, *args, code=code, **kwargs) self.created_by = created_by self.actual_version = actual_version self.required_version = required_version - self.attr_stack.append([self.__TYPE, { - 'created_by': self.created_by, - 'actual_version': self.actual_version, - 'required_version': self.required_version, - }]) + self.attr_stack.append( + [ + self.__TYPE, + { + "created_by": self.created_by, + "actual_version": self.actual_version, + "required_version": self.required_version, + }, + ] + ) class JamoviFileCorruptError(JamoviRendererError): @@ -32,14 +44,26 @@ class JamoviFileCorruptError(JamoviRendererError): while consuming jamovi files should inherit from JamoviFileCorruptError """ - __TYPE = 'jamovi_file_corrupt' + __TYPE = "jamovi_file_corrupt" - def __init__(self, message, *args, code: int = 400, corruption_type: str = '', - reason: str = '', **kwargs): + def __init__( + self, + message, + *args, + code: int = 400, + corruption_type: str = "", + reason: str = "", + **kwargs, + ): super().__init__(message, *args, code=code, **kwargs) self.corruption_type = corruption_type self.reason = reason - self.attr_stack.append([self.__TYPE, { - 'corruption_type': self.corruption_type, - 'reason': self.reason, - }]) + self.attr_stack.append( + [ + self.__TYPE, + { + "corruption_type": self.corruption_type, + "reason": self.reason, + }, + ] + ) diff --git a/mfr/extensions/jamovi/html_processor.py b/mfr/extensions/jamovi/html_processor.py index bcce13de8..e865c18f1 100644 --- a/mfr/extensions/jamovi/html_processor.py +++ b/mfr/extensions/jamovi/html_processor.py @@ -5,13 +5,12 @@ class HTMLProcessor(HTMLParser): - # The HTMLProcessor replaces the src attribute in tags with the base64 equivalent. # The image content comes from the zip_file (specified with set_src_source()). # It also strips