Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions process_report/invoices/invoice.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
import pandas
import os

import process_report.util as util

Expand Down Expand Up @@ -122,3 +123,20 @@ def export(self):
def export_s3(self, s3_bucket):
s3_bucket.upload_file(self.output_path, self.output_s3_key)
s3_bucket.upload_file(self.output_path, self.output_s3_archive_key)

def fetch(self, s3_bucket=None):
"""Fetches the exported invoice CSV from S3 and loads it into self.data.

If s3_bucket is not provided, uses the default invoice bucket.
The CSV is downloaded to the current working directory using its base filename.
"""
s3_key = self.output_s3_key
local_filename = os.path.basename(s3_key)

if s3_bucket is None:
# Fallback to default bucket utility which also downloads
util.fetch_s3(s3_key)
else:
s3_bucket.download_file(s3_key, local_filename)

self.data = pandas.read_csv(local_filename)
31 changes: 31 additions & 0 deletions process_report/tests/unit/invoices/test_base_invoice.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from unittest import TestCase, mock
import pandas
from typing import TYPE_CHECKING

if TYPE_CHECKING:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the use of this check?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://docs.python.org/3/library/typing.html#typing.TYPE_CHECKING

Using this pattern essentially lets you import types only when running type checkers or using code editors, not during actual program execution. This avoids slow imports or circular dependencies while still keeping type hints working properly.

Copy link
Contributor

@QuanMPhm QuanMPhm Sep 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. That is interesting. People come up with tools for everything.

I'd argue that this change is beyond the scope of this issue. Introducing the use (or in this case, the implication) of type checkers is seperate from adding a fetch function, and should be done in its own PR.

Do note also, by using TYPE_CHECKING here, there's pressure to refactor the entire repo to follow this pattern as well. At least in our repos, whenever a code pattern or tool is proposed to a repo, we will want that feature to be applied uniformly across the entire repo, not just in one-off locations. This generally makes the repo more cohesive and easier to maintain.

That being said. If you'd like to propose using TYPE_CHECKING and encourage the use of type checkers, I would suggest making a new PR for it and ask people on their opinion. I, for one, am ambivalent.

from unittest.mock import MagicMock

from process_report.tests import util as test_utils

Expand All @@ -17,6 +21,33 @@ def test_filter_exported_columns(self):

self.assertTrue(result_invoice.equals(answer_invoice))

@mock.patch("pandas.read_csv")
def test_fetch_with_mock_s3_bucket(self, mock_read_csv: "MagicMock") -> None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're conditionally importing the MagicMock due to the type hints in this function.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Followup on the comment above. I suppose it's fine to keep this type hint string

"""Test that fetch() method loads data correctly when S3 bucket is mocked."""
test_invoice = test_utils.new_base_invoice(
name="TestInvoice", invoice_month="2024-08"
)
expected_data = pandas.DataFrame(
{
"Invoice Month": ["2024-08", "2024-08"],
"Project - Allocation": ["P1", "P2"],
"Manager (PI)": ["[email protected]", "[email protected]"],
"Cost": [100.00, 150.00],
}
)
mock_read_csv.return_value = expected_data

mock_s3_bucket = mock.MagicMock()
mock_s3_bucket.download_file.return_value = None

test_invoice.fetch(mock_s3_bucket)

self.assertIsNotNone(test_invoice.data)
self.assertTrue(test_invoice.data.equals(expected_data))
mock_s3_bucket.download_file.assert_called_once_with(
"Invoices/2024-08/TestInvoice 2024-08.csv", "TestInvoice 2024-08.csv"
)


class TestUploadToS3(TestCase):
@mock.patch("process_report.util.get_invoice_bucket")
Expand Down
Loading