diff --git a/.gitignore b/.gitignore index 34a3822f..3e9236e8 100644 --- a/.gitignore +++ b/.gitignore @@ -119,3 +119,4 @@ venv.bak/ # Misc backups *.bak +report-api/src/sbc-common-components \ No newline at end of file diff --git a/report-api/src/api/resources/report.py b/report-api/src/api/resources/report.py index 4b490643..a1ba2561 100644 --- a/report-api/src/api/resources/report.py +++ b/report-api/src/api/resources/report.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. """Endpoints to check and manage payments.""" +import gzip +import json from http import HTTPStatus -from flask import Response, abort, request +from flask import Response, abort, request, stream_with_context from flask_restx import Namespace, Resource from jinja2 import TemplateNotFound @@ -25,6 +27,80 @@ API = Namespace('Reports', description='Service - Reports') +def _parse_request_json(): + """Parse request JSON, handling GZIP decompression if needed.""" + content_encoding = request.headers.get('Content-Encoding', '').lower() + if content_encoding == 'gzip': + try: + compressed_data = request.get_data() + decompressed_data = gzip.decompress(compressed_data) + return json.loads(decompressed_data.decode('utf-8')) + except (gzip.BadGzipFile, json.JSONDecodeError, UnicodeDecodeError) as e: + abort(HTTPStatus.BAD_REQUEST, f'Failed to decompress or parse GZIP data: {str(e)}') + return request.get_json() + + +def _generate_csv_report(request_json): + """Generate CSV report from request data.""" + report_name = request_json.get('reportName', 'report') + file_name = f'{report_name}.csv' + template_vars = request_json.get('templateVars', {}) + if not template_vars.get('columns'): + return None, file_name + report = CsvService.create_report(template_vars) + return report, file_name + + +def _generate_pdf_report(request_json): + """Generate PDF report from request data.""" + report_name = request_json.get('reportName', 'report') + file_name = f'{report_name}.pdf' + template_vars = request_json['templateVars'] + populate_page_number = bool(request_json.get('populatePageNumber', None)) + + if 'templateName' in request_json: + template_name = request_json['templateName'] + try: + report = ReportService.create_report_from_stored_template( + template_name, template_vars, populate_page_number + ) + except TemplateNotFound: + abort(HTTPStatus.NOT_FOUND, 'Template not found') + except ValueError as e: + abort(HTTPStatus.BAD_REQUEST, str(e)) + elif 'template' in request_json: + report = ReportService.create_report_from_template( + request_json['template'], template_vars, populate_page_number + ) + else: + report = None + + return report, file_name + + +def _create_response(report, file_name, content_type): + """Create streaming HTTP response with report data.""" + if report is None: + abort(HTTPStatus.BAD_REQUEST, 'Report cannot be generated') + + content_disposition = f'attachment; filename="{file_name}"' # noqa: E702 + + if content_type == 'text/csv': + response_data = stream_with_context(report) + else: + def pdf_generator(): + yield report + response_data = stream_with_context(pdf_generator()) + + return Response( + response_data, + mimetype=content_type, + headers={ + 'Content-Disposition': content_disposition + } + ) + + @API.route('') class Report(Resource): """Payment endpoint resource.""" @@ -38,35 +114,10 @@ def get(): @_jwt.requires_auth def post(): """Create a report.""" - report = None - request_json = request.get_json() + request_json = _parse_request_json() response_content_type = request.headers.get('Accept', 'application/pdf') if response_content_type == 'text/csv': - file_name = f"{request_json.get('reportName')}.csv" - report = CsvService.create_report(request_json.get('templateVars')) - else: - file_name = f"{request_json.get('reportName')}.pdf" - template_vars = request_json['templateVars'] - populate_page_number = bool(request_json.get('populatePageNumber', None)) - - if 'templateName' in request_json: # Ignore template if template_name is present - template_name = request_json['templateName'] - try: - report = ReportService.create_report_from_stored_template(template_name, template_vars, - populate_page_number) - except TemplateNotFound: - abort(HTTPStatus.NOT_FOUND, 'Template not found') - - elif 'template' in request_json: - report = ReportService.create_report_from_template(request_json['template'], template_vars, - populate_page_number) - - if report is not None: - response = Response(report, 200) - response.headers.set('Content-Disposition', 'attachment', filename=file_name) - response.headers.set('Content-Type', response_content_type) - + report, file_name = _generate_csv_report(request_json) else: - abort(HTTPStatus.BAD_REQUEST, 'Report cannot be generated') - - return response + report, file_name = _generate_pdf_report(request_json) + return _create_response(report, file_name, response_content_type) diff --git a/report-api/src/api/resources/templates.py b/report-api/src/api/resources/templates.py index b8600f7d..522a2126 100644 --- a/report-api/src/api/resources/templates.py +++ b/report-api/src/api/resources/templates.py @@ -43,4 +43,6 @@ def get(): response.headers.set('Content-Type', 'application/html') except TemplateNotFound: abort(HTTPStatus.NOT_FOUND, 'Template not found') + except ValueError as e: + abort(HTTPStatus.BAD_REQUEST, str(e)) return response diff --git a/report-api/src/api/services/chunk_report_service.py b/report-api/src/api/services/chunk_report_service.py index 0cab3901..78db2825 100644 --- a/report-api/src/api/services/chunk_report_service.py +++ b/report-api/src/api/services/chunk_report_service.py @@ -27,7 +27,7 @@ from api.services.footer_service import add_page_numbers_to_pdf from api.services.gotenberg_service import GotenbergService -from api.utils.util import TEMPLATE_FOLDER_PATH +from api.utils.util import TEMPLATE_FOLDER_PATH, sanitize_template_name class ChunkReportService: # pylint:disable=too-few-public-methods @@ -86,8 +86,9 @@ def _build_chunk_html( chunk_vars['groupedInvoices'] = [invoice_copy] chunk_vars['_chunk_info'] = asdict(chunk_info) + sanitized_name = sanitize_template_name(template_name) template = ChunkReportService._TEMPLATE_ENV.get_template( - f'{TEMPLATE_FOLDER_PATH}/{template_name}.html' + f'{TEMPLATE_FOLDER_PATH}/{sanitized_name}.html' ) bc_logo_url = url_for('static', filename='images/bcgov-logo-vert.jpg') registries_url = url_for('static', filename='images/reg_logo.png') diff --git a/report-api/src/api/services/csv_service.py b/report-api/src/api/services/csv_service.py index ae62ee22..24010710 100644 --- a/report-api/src/api/services/csv_service.py +++ b/report-api/src/api/services/csv_service.py @@ -15,25 +15,31 @@ """Service to manage report-templates.""" import csv -from tempfile import NamedTemporaryFile -from typing import Dict +import io +from typing import Dict, Iterator class CsvService: # pylint: disable=too-few-public-methods """Service for all template related operations.""" @classmethod - def create_report(cls, payload: Dict): - """Create a report csv report from the input parameters.""" - temp_file = None + def create_report(cls, payload: Dict) -> Iterator[bytes]: + """Create a streaming CSV report generator from the input parameters.""" columns = payload.get('columns', None) values = payload.get('values', None) - if columns: - temp_file = NamedTemporaryFile(delete=True) # pylint: disable=consider-using-with - with open(temp_file.name, 'w', newline='', encoding='utf-8') as csvfile: - report = csv.writer(csvfile) - report.writerow(columns) - for row in values: - report.writerow(row) - - return temp_file + if not columns: + return + + buffer = io.StringIO() + writer = csv.writer(buffer) + + writer.writerow(columns) + yield buffer.getvalue().encode('utf-8') + buffer.seek(0) + buffer.truncate(0) + + for row in values: + writer.writerow(row) + yield buffer.getvalue().encode('utf-8') + buffer.seek(0) + buffer.truncate(0) diff --git a/report-api/src/api/services/report_service.py b/report-api/src/api/services/report_service.py index 011e7688..415d8adf 100644 --- a/report-api/src/api/services/report_service.py +++ b/report-api/src/api/services/report_service.py @@ -26,7 +26,7 @@ from api.services.footer_service import add_page_numbers_to_pdf from api.services.gotenberg_service import GotenbergService from api.services.page_info import populate_page_count, populate_page_info -from api.utils.util import TEMPLATE_FOLDER_PATH +from api.utils.util import TEMPLATE_FOLDER_PATH, sanitize_template_name def format_datetime(value, format='short'): # pylint: disable=redefined-builtin @@ -98,7 +98,8 @@ def create_report_from_stored_template( generate_page_number: bool = False, ): """Create a report from a stored template.""" - template = ENV.get_template(f'{TEMPLATE_FOLDER_PATH}/{template_name}.html') + sanitized_name = sanitize_template_name(template_name) + template = ENV.get_template(f'{TEMPLATE_FOLDER_PATH}/{sanitized_name}.html') bc_logo_url = url_for('static', filename='images/bcgov-logo-vert.jpg') registries_url = url_for('static', filename='images/reg_logo.png') html_out = template.render( diff --git a/report-api/src/api/services/template_service.py b/report-api/src/api/services/template_service.py index fd2ee159..407fd3e0 100644 --- a/report-api/src/api/services/template_service.py +++ b/report-api/src/api/services/template_service.py @@ -16,11 +16,10 @@ import fnmatch import os -import os.path from jinja2 import Environment, FileSystemLoader -from api.utils.util import TEMPLATE_FOLDER_PATH +from api.utils.util import TEMPLATE_FOLDER_PATH, sanitize_template_name ENV = Environment(loader=FileSystemLoader('.')) @@ -42,6 +41,7 @@ def find_all_templates(): @classmethod def get_stored_template(cls, templatename: str, ): """Get a stored template.""" - template = ENV.get_template(f'{TEMPLATE_FOLDER_PATH}/{templatename}.html') + sanitized_name = sanitize_template_name(templatename) + template = ENV.get_template(f'{TEMPLATE_FOLDER_PATH}/{sanitized_name}.html') html_template = template.render() return html_template diff --git a/report-api/src/api/utils/util.py b/report-api/src/api/utils/util.py index 3acc9bdc..6b79de4f 100755 --- a/report-api/src/api/utils/util.py +++ b/report-api/src/api/utils/util.py @@ -16,11 +16,32 @@ A simple decorator to add the options method to a Request Class. """ -# from functools import wraps +import os.path +import re TEMPLATE_FOLDER_PATH = 'report-templates/' +def sanitize_template_name(template_name: str) -> str: + """Sanitize template name to prevent path traversal attacks.""" + if not template_name: + raise ValueError('Template name cannot be empty') + + sanitized = re.sub(r'[^a-zA-Z0-9_-]', '', template_name) + + if not sanitized: + raise ValueError('Template name contains no valid characters') + + if '..' in template_name or '/' in template_name or '\\' in template_name: + raise ValueError('Template name contains invalid path characters') + + final_path = os.path.join(TEMPLATE_FOLDER_PATH, f'{sanitized}.html') + if not os.path.abspath(final_path).startswith(os.path.abspath(TEMPLATE_FOLDER_PATH)): + raise ValueError('Template path traversal detected') + + return sanitized + + def cors_preflight(methods: str = 'GET'): """Render an option method on the class.""" def wrapper(f): diff --git a/report-api/tests/unit/api/test_reports.py b/report-api/tests/unit/api/test_reports.py index 2241685a..873ce610 100644 --- a/report-api/tests/unit/api/test_reports.py +++ b/report-api/tests/unit/api/test_reports.py @@ -19,6 +19,7 @@ """ import base64 +import gzip import json from .base_test import get_claims, token_header @@ -201,3 +202,183 @@ def test_statement_grouped_invoices(client, jwt, app, monkeypatch): resp = client.post('/api/v1/reports', data=json.dumps(data), headers=headers) assert resp.status_code == 200 assert resp.content_type == 'application/pdf' + + +def test_response_is_streaming(client, jwt, app, mock_gotenberg_requests): + """Verify that PDF response is streaming (no Content-Length header set).""" + token = jwt.create_jwt(get_claims(app_request=app), token_header) + headers = {'Authorization': f'Bearer {token}', 'content-type': 'application/json'} + + request_url = '/api/v1/reports' + request_data = { + 'templateName': 'invoice', + 'templateVars': {'title': 'This is a sample request'}, + 'reportName': 'sample' + } + + rv = client.post(request_url, data=json.dumps(request_data), headers=headers) + assert rv.status_code == 200 + assert rv.content_type == 'application/pdf' + assert 'Content-Disposition' in rv.headers + assert 'attachment' in rv.headers['Content-Disposition'] + assert 'Content-Length' not in rv.headers + assert len(rv.data) > 0 + assert rv.data.startswith(b'%PDF') + + +def test_csv_response_is_streaming(client, jwt, app): + """Verify that CSV response is streaming.""" + token = jwt.create_jwt(get_claims(app_request=app), token_header) + headers = { + 'Authorization': f'Bearer {token}', + 'content-type': 'application/json', + 'Accept': 'text/csv' + } + request_url = '/api/v1/reports' + request_data = { + 'reportName': 'test', + 'templateVars': { + 'columns': ['a', 'b', 'c'], + 'values': [['1', '2', '3'], ['4', '5', '6']] + } + } + + rv = client.post(request_url, data=json.dumps(request_data), headers=headers) + assert rv.status_code == 200 + assert rv.content_type.startswith('text/csv') + assert 'Content-Disposition' in rv.headers + assert 'attachment' in rv.headers['Content-Disposition'] + assert 'Content-Length' not in rv.headers + assert len(rv.data) > 0 + assert b'a,b,c' in rv.data or b'a,b,c\r\n' in rv.data + + +def test_gzip_request_compression(client, jwt, app, mock_gotenberg_requests): + """Verify that GZIP compressed request body is properly decompressed and processed.""" + token = jwt.create_jwt(get_claims(app_request=app), token_header) + + request_url = '/api/v1/reports' + request_data = { + 'templateName': 'invoice', + 'templateVars': {'title': 'This is a GZIP compressed request'}, + 'reportName': 'gzip_test' + } + + json_data = json.dumps(request_data).encode('utf-8') + compressed_data = gzip.compress(json_data) + + headers = { + 'Authorization': f'Bearer {token}', + 'content-type': 'application/json', + 'Content-Encoding': 'gzip' + } + + rv = client.post( + request_url, + data=compressed_data, + headers=headers + ) + + assert rv.status_code == 200 + assert rv.content_type == 'application/pdf' + assert len(rv.data) > 0 + + +def test_gzip_request_compression_csv(client, jwt, app): + """Verify that GZIP compressed request body works for CSV reports.""" + token = jwt.create_jwt(get_claims(app_request=app), token_header) + + request_url = '/api/v1/reports' + request_data = { + 'reportName': 'gzip_csv_test', + 'templateVars': { + 'columns': ['col1', 'col2'], + 'values': [['val1', 'val2']] + } + } + + json_data = json.dumps(request_data).encode('utf-8') + compressed_data = gzip.compress(json_data) + + headers = { + 'Authorization': f'Bearer {token}', + 'content-type': 'application/json', + 'Content-Encoding': 'gzip', + 'Accept': 'text/csv' + } + + rv = client.post( + request_url, + data=compressed_data, + headers=headers + ) + + assert rv.status_code == 200 + assert rv.content_type.startswith('text/csv') + assert len(rv.data) > 0 + assert b'col1' in rv.data or b'col1,col2' in rv.data + + +def test_gzip_request_invalid_compression(client, jwt, app): + """Verify that invalid GZIP data returns appropriate error.""" + token = jwt.create_jwt(get_claims(app_request=app), token_header) + + request_url = '/api/v1/reports' + invalid_compressed_data = b'not valid gzip data' + + headers = { + 'Authorization': f'Bearer {token}', + 'content-type': 'application/json', + 'Content-Encoding': 'gzip' + } + + rv = client.post( + request_url, + data=invalid_compressed_data, + headers=headers + ) + + assert rv.status_code == 400 + response_data = rv.get_json() + assert response_data is not None + assert 'message' in response_data + assert 'Failed to decompress' in response_data['message'] + + +def test_template_name_sanitization(client, jwt, app, mock_gotenberg_requests): + """Verify that template names are sanitized to prevent path traversal attacks.""" + token = jwt.create_jwt(get_claims(app_request=app), token_header) + headers = {'Authorization': f'Bearer {token}', 'content-type': 'application/json'} + + request_url = '/api/v1/reports' + + request_data = { + 'templateName': '../../etc/passwd', + 'templateVars': {'title': 'Test'}, + 'reportName': 'test' + } + + rv = client.post(request_url, data=json.dumps(request_data), headers=headers) + assert rv.status_code == 400 + response_data = rv.get_json() + assert response_data is not None + assert 'message' in response_data + message_lower = response_data['message'].lower() + assert 'invalid path characters' in message_lower or 'path traversal' in message_lower + + +def test_template_name_sanitization_invalid_chars(client, jwt, app, mock_gotenberg_requests): + """Verify that template names with invalid characters are sanitized.""" + token = jwt.create_jwt(get_claims(app_request=app), token_header) + headers = {'Authorization': f'Bearer {token}', 'content-type': 'application/json'} + + request_url = '/api/v1/reports' + + request_data = { + 'templateName': 'invoice', + 'templateVars': {'title': 'Test'}, + 'reportName': 'test' + } + + rv = client.post(request_url, data=json.dumps(request_data), headers=headers) + assert rv.status_code == 400 diff --git a/report-api/tests/unit/services/test_csv_service.py b/report-api/tests/unit/services/test_csv_service.py index 6de1cbe0..5716336e 100644 --- a/report-api/tests/unit/services/test_csv_service.py +++ b/report-api/tests/unit/services/test_csv_service.py @@ -51,4 +51,9 @@ def test_create_csv_with_no_data(app): csv_payload = { } csv_report = CsvService.create_report(csv_payload) - assert csv_report is None + assert csv_report is not None + try: + next(csv_report) + assert False, 'Generator should be empty' + except StopIteration: + pass