diff --git a/.gitignore b/.gitignore
index 0ae86e0..e888a89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 .ipynb_checkpoints
 build/
 *.egg-info/
+__pycache__/
 .pyc
diff --git a/dist/neonutilities-1.1.1-py3-none-any.whl b/dist/neonutilities-1.1.1-py3-none-any.whl
index 070b5b4..28885e5 100644
Binary files a/dist/neonutilities-1.1.1-py3-none-any.whl and b/dist/neonutilities-1.1.1-py3-none-any.whl differ
diff --git a/dist/neonutilities-1.1.1.tar.gz b/dist/neonutilities-1.1.1.tar.gz
index 43819ea..e9ded00 100644
Binary files a/dist/neonutilities-1.1.1.tar.gz and b/dist/neonutilities-1.1.1.tar.gz differ
diff --git a/requirements.txt b/requirements.txt
index a9cd6f4..874cd35 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ pyarrow>=16.0.0
 pyproj>=3.6.1
 requests>=2.31.0
 tqdm>=4.66.2
+google_crc32c>=1.7.0
\ No newline at end of file
diff --git a/src/neonutilities/__init__.py b/src/neonutilities/__init__.py
index 7132757..4ee09e9 100644
--- a/src/neonutilities/__init__.py
+++ b/src/neonutilities/__init__.py
@@ -12,5 +12,4 @@
     stack_by_table,
     load_by_product,
     dataset_query,
-)
-
+)
\ No newline at end of file
diff --git a/src/neonutilities/__pycache__/__init__.cpython-311.pyc b/src/neonutilities/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index b304059..0000000
Binary files a/src/neonutilities/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/__pycache__/aop_download.cpython-311.pyc b/src/neonutilities/__pycache__/aop_download.cpython-311.pyc
deleted file mode 100644
index 45a7e0d..0000000
Binary files a/src/neonutilities/__pycache__/aop_download.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/__pycache__/citation.cpython-311.pyc b/src/neonutilities/__pycache__/citation.cpython-311.pyc
deleted file mode 100644
index b5d53d2..0000000
Binary files a/src/neonutilities/__pycache__/citation.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/__pycache__/files_by_uri.cpython-311.pyc b/src/neonutilities/__pycache__/files_by_uri.cpython-311.pyc
deleted file mode 100644
index a61e63f..0000000
Binary files a/src/neonutilities/__pycache__/files_by_uri.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/__pycache__/get_issue_log.cpython-311.pyc b/src/neonutilities/__pycache__/get_issue_log.cpython-311.pyc
deleted file mode 100644
index c4eb540..0000000
Binary files a/src/neonutilities/__pycache__/get_issue_log.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/__pycache__/read_table_neon.cpython-311.pyc b/src/neonutilities/__pycache__/read_table_neon.cpython-311.pyc
deleted file mode 100644
index fdd4465..0000000
Binary files a/src/neonutilities/__pycache__/read_table_neon.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/__pycache__/tabular_download.cpython-311.pyc b/src/neonutilities/__pycache__/tabular_download.cpython-311.pyc
deleted file mode 100644
index 424b40a..0000000
Binary files a/src/neonutilities/__pycache__/tabular_download.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/__pycache__/unzip_and_stack.cpython-311.pyc b/src/neonutilities/__pycache__/unzip_and_stack.cpython-311.pyc
deleted file mode 100644
index a3ed8da..0000000
Binary files a/src/neonutilities/__pycache__/unzip_and_stack.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/aop_download.py b/src/neonutilities/aop_download.py
index b575e55..ef5b047 100644
--- a/src/neonutilities/aop_download.py
+++ b/src/neonutilities/aop_download.py
@@ -15,23 +15,29 @@
 @author: Claire Lunch (clunch@battelleecology.org)
 @author: Christine Laney (claney@battelleecology.org)
 
+Updated Oct 2025 to include options to skip files if they already exist locally and 
+to validate existing files against files published on the Data Portal with checksums.
+
 """
 
 # %% imports
 from datetime import datetime
+import glob
 import importlib_resources
 import logging
 import pandas as pd
 import numpy as np
 import os
 import re
+
+# import time
 from time import sleep
 from tqdm import tqdm
 
 # local imports
 from . import __resources__
 from .helper_mods.api_helpers import get_api
-from .helper_mods.api_helpers import download_file
+from .helper_mods.api_helpers import download_file, calculate_crc32c
 from .helper_mods.metadata_helpers import convert_byte_size
 from .get_issue_log import get_issue_log
 from .citation import get_citation
@@ -104,8 +110,8 @@ def get_file_urls(urls, token=None):
 
     Notes
     --------
-    The function makes API calls to each URL in the 'urls' list and retrieves the file information.
-    It also retrieves the release information from the response JSON.
+    The function makes API calls to each URL in the 'urls' list and retrieves the file
+    information. It also retrieves the release information from the response JSON.
     If the API call fails, it prints a warning message and continues with the next URL.
     """
 
@@ -206,7 +212,9 @@ def get_neon_sites():
     neon_sites_df = pd.read_csv(neon_sites_file)
 
     neon_sites_list = list(neon_sites_df["field_site_id"])
-    neon_sites_list.append("CHEQ")
+    neon_sites_list.append(
+        "CHEQ"
+    )  # append the CHEQ site, a separate flight area than STEI-TREE
 
     return neon_sites_list
 
@@ -217,6 +225,52 @@ def get_data_product_name(dpid):
     return product_name
 
 
+def check_exists_and_checksum(row, download_path):
+    """
+    Check if a file exists locally and if its CRC32C checksum matches the expected value.
+    Used in the skip_if_exists option of by_tile_aop and by_file_aop.
+
+    Parameters
+    ----------
+    row : pandas.Series
+        A row from a DataFrame containing at least a 'url' and optionally a 'crc32c' field.
+    download_path : str
+        The root directory where files are downloaded.
+
+    Returns
+    -------
+    pandas.Series
+        A Series of two boolean values:
+        - exists_locally: True if the file exists locally, False otherwise.
+        - checksum_matches: True if the local file's CRC32C matches the expected value, False otherwise.
+
+    Example
+    -------
+    >>> file_url_df[["exists_locally", "checksum_matches"]] = file_url_df.apply(
+    ...     lambda row: check_exists_and_checksum(row, download_path), axis=1
+    ... )
+    """
+    # print(row["url"])
+    pathparts = row["url"].split("/")
+    file_path = os.path.join(download_path, *pathparts[3:])
+    # file_path = os.path.join(download_path,"\\".join(pathparts[3:]))
+    # print(file_path)
+    exists = os.path.exists(file_path)
+    # print(exists)
+    if not exists:
+        # print(f'{pathparts[-1:]} does not exist!')
+        return pd.Series([False, False])
+    expected_crc32c = row.get("crc32c")
+    # print(expected_crc32c)
+    if expected_crc32c:
+        local_crc32c = calculate_crc32c(file_path)
+        matches = local_crc32c.zfill(8) == str(expected_crc32c).zfill(8)
+        # return pd.Series({"exists_locally": True, "checksum_matches": matches})
+        return pd.Series([True, matches])
+    return pd.Series([True, False])
+    # return pd.Series({"exists_locally": True, "checksum_matches": False})
+
+
 # %% functions to validate inputs for by_file_aop and by_tile_aop
 
 
@@ -356,7 +410,7 @@ def validate_aop_l3_dpid(dpid):
 def check_field_spectra_dpid(dpid):
     if dpid == "DP1.30012.001":
         raise ValueError(
-            f"NEON {dpid} is the Field spectral data product, which is published as tabular data. Use zipsByProduct() or loadByProduct() to download these data."
+            f"NEON {dpid} is the Field spectral data product, which is published as tabular data. Use zips_by_product() or loadByProduct() to download these data."
         )
 
 
@@ -402,6 +456,28 @@ def validate_year(year):
         )
 
 
+def validate_overwrite(overwrite):
+    """
+    Validates that overwrite is one of the accepted options: 'yes', 'no', or 'prompt'.
+    Raises a ValueError if not.
+    """
+    valid_options = {"yes", "no", "prompt"}
+    if overwrite not in valid_options:
+        raise ValueError(f"overwrite must be one of {valid_options}. ")
+
+
+def validate_skip_if_exists(skip_if_exists):
+    """
+    Validates that skip_if_exists is a boolean (True or False).
+    Raises a ValueError if not.
+    """
+    if not isinstance(skip_if_exists, bool):
+        raise ValueError(
+            f"skip_if_exists must be a boolean (True or False). "
+            f"Received: '{skip_if_exists}' of type {type(skip_if_exists)}"
+        )
+
+
 def check_aop_dpid(response_dict, dpid):
     if response_dict["data"]["productScienceTeamAbbr"] != "AOP":
         logging.info(
@@ -678,6 +754,9 @@ def by_file_aop(
     savepath=None,
     chunk_size=1024,
     token=None,
+    verbose=False,
+    skip_if_exists=False,
+    overwrite="prompt",
 ):
     """
     This function queries the NEON API for AOP data by site, year, and product, and downloads all
@@ -712,23 +791,51 @@ def by_file_aop(
         Size in bytes of chunk for chunked download. Defaults to 1024.
 
     token: str, optional
-        User-specific API token from data.neonscience.org user account. See
-        https://data.neonscience.org/data-api/rate-limiting/ for details about
-        API rate limits and user tokens.
+        User-specific API token from data.neonscience.org user account. Defaults to None.
+        See https://data.neonscience.org/data-api/rate-limiting/ for details about API
+        rate limits and user tokens.
+
+    verbose: bool, optional
+        If set to True, the function will print more detailed information about the download process.
+
+    skip_if_exists: bool, optional
+        If set to True, the function will skip downloading files that already exist in the
+        savepath and are valid (local checksums match the checksums of the published file).
+        Defaults to False. If any local file checksums don't match those of files published
+        on the NEON Data Portal, the user will be prompted to skip these files or overwrite
+        the existing files with the new ones (see overwrite input).
+
+    overwrite: str, optional
+        Must be one of:
+            'yes'    - overwrite mismatched files without prompting,
+            'no'     - don't overwrite mismatched files (skip them, no prompt),
+            'prompt' - prompt the user (y/n) to overwrite mismatched files after displaying them (default).
+        If skip_if_exists is False, this parameter is ignored, and any existing files in
+        the savepath will be overwritten according to the function's default behavior.
 
     Returns
     --------
-    None; data are downloaded to the local directory specified.
+    None; data are downloaded to the directory specified (savepath) or the current working directory.
+    If data already exist in the expected path, they will be overwritten by default. To check for
+    existing files before downloading, set skip_if_exists=True along with an overwrite option (y/n/prompt).
 
     Examples
     --------
-    >>> by_file_aop(dpid="DP3.30015.001", site="MCRA", year="2021", savepath="./test_download")
-    # This will download 2021 canopy height model data from McRae Creek to the './test_download' directory.
+    >>> by_file_aop(dpid="DP3.30015.001",
+                    site="MCRA",
+                    year="2021",
+                    savepath="./test_download",
+                    skip_if_exists=True)
+    # This downloads the 2021 Canopy Height Model data from McRae Creek to the './test_download' directory.
+    # If any files already exist in the savepath, they will be checked and skipped if they are valid.
+    # The user will be prompted to ovewrite or skip downloading any existing files that do not match
+    # the latest published data on the NEON Data Portal.
 
     Notes
     --------
-    The function creates a folder in the 'savepath' directory, containing all AOP files meeting the query criteria.
-    If 'savepath' is not provided, data are downloaded to the working directory.
+    This function creates a folder named by the Data Product ID (DPID; e.g. DP3.30015.001) in the
+    'savepath' directory, containing all AOP files meeting the query criteria. If 'savepath' is
+    not provided, data are downloaded to the working directory, in a folder named by the DPID.
     """
 
     # raise value error and print message if dpid isn't formatted as expected
@@ -751,6 +858,19 @@ def by_file_aop(
     year = str(year)  # cast year to string (if it's not already)
     validate_year(year)
 
+    # raise value error and print message if skip_if_exists input is not valid (boolean)
+    validate_skip_if_exists(skip_if_exists)
+
+    # raise value error and print message if validate_overwrite input is not valid (yes, no, prompt)
+    validate_overwrite(overwrite)
+
+    # warn if overwrite is set but skip_if_exists is False
+    if not skip_if_exists and overwrite != "prompt":
+        logging.info(
+            "WARNING: overwrite option only applies if skip_if_exists=True. "
+            "By default, any existing files will be overwritten unless you select skip_if_exists=True and overwrite='no' or 'prompt' (default)."
+        )
+
     # if token is an empty string, set to None
     if token == "":
         token = None
@@ -766,8 +886,6 @@ def by_file_aop(
     # check that token was used
     if token and "x-ratelimit-limit" in response.headers:
         check_token(response)
-        # if response.headers['x-ratelimit-limit'] == '200':
-        #     print('API token was not recognized. Public rate limit applied.\n')
 
     # get the request response dictionary
     response_dict = response.json()
@@ -794,7 +912,6 @@ def by_file_aop(
 
     # get the number of files in the dataframe, if there are no files to download, return
     if len(file_url_df) == 0:
-        # print("No data files found.")
         logging.info("No NEON data files found.")
         return
 
@@ -806,12 +923,10 @@ def by_file_aop(
         )
     else:
         # log provisional not included message and filter to the released data
-        # logging.info(
-        #     "Provisional data are not included. To download provisional data, use input parameter include_provisional=True.")
         file_url_df = file_url_df[file_url_df["release"] != "PROVISIONAL"]
         if len(file_url_df) == 0:
             logging.info(
-                "NEON Provisional data are not included. To download provisional data, use input parameter include_provisional=True."
+                "Provisional NEON data are not included. To download provisional data, use input parameter include_provisional=True."
             )
 
     num_files = len(file_url_df)
@@ -834,9 +949,9 @@ def by_file_aop(
                 f"Continuing will download {num_files} NEON data files totaling approximately {download_size}. Do you want to proceed? (y/n) "
             )
             .strip()
-            .lower()
+            .lower()  # lower or upper case 'y' will work
             != "y"
-        ):  # lower or upper case 'y' will work
+        ):
             print("Download halted.")
             return
 
@@ -849,15 +964,212 @@ def by_file_aop(
 
     # serially download all files, with progress bar
     files = list(file_url_df["url"])
-    print(
-        f"Downloading {num_files} NEON data file(s) totaling approximately {download_size}\n"
-    )
-    sleep(1)
-    for file in tqdm(files):
-        download_file(
-            url=file, savepath=download_path, chunk_size=chunk_size, token=token
+
+    # different messages depending on whether skip_if_exists is True or False
+    if skip_if_exists:
+        logging.info(
+            f"Found {num_files} NEON data files totaling approximately {download_size}.\n"
+            "Files in savepath will be checked and skipped if they exist and match the latest version."
+        )
+    else:
+        logging.info(
+            f"Downloading {num_files} NEON data files totaling approximately {download_size}\n"
+        )
+        # verbose option to list all files being downloaded
+        if verbose:
+            logging.info("Downloading the following data and metadata files:")
+            for file in files:
+                logging.info(file.replace("https://storage.googleapis.com", f"{dpid}"))
+
+    if skip_if_exists:
+        # Existence and checksum check
+        file_url_df[["exists_locally", "checksum_matches"]] = file_url_df.apply(
+            lambda row: check_exists_and_checksum(row, download_path), axis=1
+        )
+
+        # Handle the various cases
+        # 1. Skip files that exist locally and checksums match those on the GCS (exists_locally and checksum_matches are both True).
+        # 2. Prompt the user to decide whether to download (overwrite) files that exist locally but checksums don't match (exists_locally is True and checksum_matches is False).
+        # 3. Download files if they don't already exist locally (exists_locally is False).
+        # 4. If there are extra files locally that don't exist on the GCS, display a warning message.
+
+        # Skipped files
+        files_to_skip = file_url_df[
+            (file_url_df["exists_locally"]) & (file_url_df["checksum_matches"])
+        ]
+
+        # Files to prompt for overwrite
+        mismatched_files = file_url_df[
+            (file_url_df["exists_locally"]) & (~file_url_df["checksum_matches"])
+        ]
+
+        # Files to download (do not exist locally)
+        files_to_download = file_url_df[~file_url_df["exists_locally"]]
+
+        # Identify README files (case-insensitive, .txt)
+        readme_files = file_url_df[
+            file_url_df["name"].str.contains("readme", case=False, na=False)
+        ]
+
+        # If any files are missing or mismatched, download the README file
+        if (
+            not files_to_download.empty or not mismatched_files.empty
+        ) and not readme_files.empty:
+            logging.info("Downloading README file")
+            for idx, row in tqdm(readme_files.iterrows(), total=len(readme_files)):
+                download_file(
+                    url=row["url"],
+                    savepath=download_path,
+                    chunk_size=chunk_size,
+                    token=token,
+                )
+
+        # display a warning message if there are extra files locally that
+        # are not published on the Portal as part of the Data Product
+
+        # get all expected file paths (relative to download_path)
+        expected_files = set()
+        for _, row in file_url_df.iterrows():
+            pathparts = row["url"].split("/")
+            expected_files.add(
+                os.path.normpath(os.path.join(download_path, *pathparts[3:]))
+            )
+
+        # get the set of all AOP bucket names from the URLs
+        gcs_bucket = {
+            url.split("/")[3]
+            for url in file_url_df["url"]
+            if len(url.split("/")) > 3
+            and url.split("/")[3]
+            in ["neon-aop-products", "neon-aop-provisional-products"]
+        }
+
+        # get the domain from the URLs
+        domain = {
+            val
+            for url in file_url_df["url"]
+            for val in [url.split("/")[6]]
+            if re.fullmatch(r"D\d{2}", val)
+        }
+
+        # get the Year_Site_Visit from the URLs
+        ysv = {
+            val
+            for url in file_url_df["url"]
+            for val in [url.split("/")[7]]
+            if re.fullmatch(r"\d{4}_[A-Z]{4}_\d+", val)
+        }
+
+        # recursively find all files in the expected path
+        local_path = os.path.normpath(
+            os.path.join(
+                download_path,
+                gcs_bucket.pop(),
+                str(year),
+                "FullSite",
+                domain.pop(),
+                ysv.pop(),
+            )
+        )
+
+        # get the set of all local files (normalized paths)
+        all_local_files = set(
+            os.path.normpath(f)
+            for f in glob.glob(os.path.join(local_path, "**"), recursive=True)
+            if os.path.isfile(f)
         )
 
+        # find extra files and display warning message if any exist
+        extra_files = sorted(all_local_files - expected_files)
+        if extra_files:
+            logging.info(
+                f"WARNING: Files found in the local folder that do not exist in the latest version of data for {dpid}. "
+                f"\nYou will need to delete the extra files below if you want the download folder to match the latest available data contents,"
+                f"\nor to be safe, delete the folder {local_path} and re-download."
+                "\nExtra Files Found:"
+            )
+            for f in sorted(extra_files):
+                logging.info(f"  {os.path.abspath(f)}")
+
+        # files that do not exist locally
+        if not files_to_download.empty:
+            logging.info(
+                "The following files will be downloaded (they do not already exist locally):"
+            )
+            for f in files_to_download["name"].sort_values():
+                logging.info(f"  {f}")
+
+            # Download files that do not exist locally
+            for _, row in tqdm(
+                files_to_download.iterrows(), total=len(files_to_download)
+            ):
+                download_file(
+                    url=row["url"],
+                    savepath=download_path,
+                    chunk_size=chunk_size,
+                    token=token,
+                )
+
+            if not files_to_skip.empty:
+                logging.info(
+                    "The remainder of the files in savepath will not be downloaded. "
+                    "They already exist locally and match the latest available data."
+                )
+
+        # mismatched files (local checksums are not the same as those on the Data Portal)
+        # filter out files where the name contains "readme" (case-insensitive)
+        # readme files do not have a checksum so would always fail, these should be downloaded by default
+        mismatched_files_no_readme = mismatched_files[
+            ~mismatched_files["name"].str.contains("readme", case=False, na=False)
+        ]
+
+        # message if there is nothing to download - all files exist and they all match the checksums
+        if files_to_download.empty and mismatched_files_no_readme.empty:
+            logging.info(
+                "All files already exist locally and match the latest available data. Skipping download."
+            )
+
+        if not mismatched_files_no_readme.empty:
+            logging.info(
+                "The following files exist locally but have a different checksum than the remote files:"
+            )
+            for f in sorted(mismatched_files_no_readme["name"]):
+                logging.info(f"  {f}")
+            # determine whether to overwrite mismatched files
+            if overwrite == "yes":
+                response = "y"
+            elif overwrite == "no":
+                response = "n"
+                # logging.info("WARNING: Files where the checksum doesn't match the latest version will not be overwritten.")
+            else:  # 'prompt' or any other value
+                response = (
+                    input(
+                        "Do you want to overwrite these files with the latest version? (y/n) "
+                    )
+                    .strip()
+                    .lower()
+                )
+            if response.lower() == "y":
+                logging.info("Overwriting these files with the latest available data.")
+                # download including the readme
+                for _, row in tqdm(
+                    mismatched_files.iterrows(), total=len(mismatched_files)
+                ):
+                    download_file(
+                        url=row["url"],
+                        savepath=download_path,
+                        chunk_size=chunk_size,
+                        token=token,
+                    )
+            else:
+                logging.info("Skipped overwriting files with mismatched checksums.")
+
+    else:
+        for file in tqdm(files):
+            download_file(
+                url=file, savepath=download_path, chunk_size=chunk_size, token=token
+            )
+
     # download issue log table
     ilog = get_issue_log(dpid=dpid, token=None)
     if ilog is not None:
@@ -911,6 +1223,8 @@ def by_tile_aop(
     chunk_size=1024,
     token=None,
     verbose=False,
+    skip_if_exists=False,
+    overwrite="prompt",
 ):
     """
     This function queries the NEON API for AOP data by site, year, product, and
@@ -929,13 +1243,13 @@ def by_tile_aop(
     year: str or int
         The four-digit year of data collection.
 
-    easting: int or list of int
+    easting: float/int or list of float/int
         A number or list containing the easting UTM coordinate(s) of the locations to download.
 
-    northing: int or list of int
+    northing: float/int or list of float/int
         A number or list containing the northing UTM coordinate(s) of the locations to download.
 
-    buffer: int, optional
+    buffer: float/int, optional
         Size, in meters, of the buffer to be included around the coordinates when determining which tiles to download. Defaults to 0.
 
     include_provisional: bool, optional
@@ -956,26 +1270,55 @@ def by_tile_aop(
         Size in bytes of chunk for chunked download. Defaults to 1024.
 
     token: str, optional
-        User-specific API token from data.neonscience.org user account. See
-        https://data.neonscience.org/data-api/rate-limiting/ for details about
+        User-specific API token from data.neonscience.org user account. Defaults to None.
+        See https://data.neonscience.org/data-api/rate-limiting/ for details about
         API rate limits and user tokens.
 
     verbose: bool, optional
-        If set to True, the function will print out a list of the downloaded tiles before downloading.
+        If set to True, the function will print out a list of the tiles to be downloaded before downloading.
         Defaults to False.
 
-    Return
+    skip_if_exists: bool, optional
+        If set to True, the function will skip downloading files that already exist in the
+        savepath and are valid (local checksums match the checksums of the published file).
+        Defaults to False. If any local file checksums don't match those of files published
+        on the NEON Data Portal, the user will be prompted to skip these files or overwrite
+        the existing files with the new ones (see overwrite input).
+
+    overwrite: str, optional
+        Must be one of:
+            'yes'    - overwrite mismatched files without prompting,
+            'no'     - don't overwrite mismatched files (skip them, no prompt),
+            'prompt' - prompt the user (y/n) to overwrite mismatched files after displaying them (default).
+        If skip_if_exists is False, this parameter is ignored, and any existing files in
+        the savepath will be overwritten according to the function's default behavior.
+
+    Returns
     --------
-    None; data are downloaded to the local directory specified.
+    None; data are downloaded to the directory specified (savepath) or the current working directory.
+    If data already exist in the expected path, they will be overwritten by default. To check for
+    existing files before downloading, set skip_if_exists=True along with an overwrite option (y/n/prompt).
 
     Example
     --------
-    >>> by_tile_aop(dpid="DP3.30015.001", site="MCRA",
-                    easting=[566456, 566639], northing=[4900783, 4901094],
-                    year="2021", savepath="../../test_download")
-    # This will download any tiles overlapping the specified UTM coordinates for
+    >>> by_tile_aop(dpid="DP3.30015.001",
+                    site="MCRA",
+                    easting=[566456, 566639],
+                    northing=[4900783, 4901094],
+                    year="2021",
+                    savepath="./test_download",
+                    skip_if_exists=True)
+    # This downloads any tiles overlapping the specified UTM coordinates for
     # 2021 canopy height model data from McRae Creek to the './test_download' directory.
+    # If any files already exist in the savepath, they will be checked and skipped if they are valid.
+    # The user will be prompted to ovewrite or skip downloading any existing files that do not match
+    # the latest published data on the NEON Data Portal.
 
+    Notes
+    --------
+    This function creates a folder named by the Data Product ID (DPID; e.g. DP3.30015.001) in the
+    'savepath' directory, containing all AOP files meeting the query criteria. If 'savepath' is
+    not provided, data are downloaded to the working directory, in a folder named by the DPID.
     """
 
     # raise value error and print message if dpid isn't formatted as expected
@@ -998,6 +1341,20 @@ def by_tile_aop(
     year = str(year)  # cast year to string (if it's not already)
     validate_year(year)
 
+    # raise value error and print message if skip_if_exists input is not valid (boolean)
+    validate_skip_if_exists(skip_if_exists)
+
+    # raise value error and print message if validate_overwrite input is not valid (yes, no, prompt)
+    validate_overwrite(overwrite)
+
+    # warn if overwrite is set to yes or no, but skip_if_exists is False
+    if not skip_if_exists and overwrite != "prompt":
+        logging.info(
+            "WARNING: overwrite option only applies if skip_if_exists=True. By default, "
+            "any existing files in the expected directory will be overwritten unless "
+            "you select skip_if_exists=True and overwrite='no' or 'prompt' (default)."
+        )
+
     # convert easting and northing to lists, if they are not already
     if type(easting) is not list:
         easting = [easting]
@@ -1021,8 +1378,7 @@ def by_tile_aop(
         )
         print(e)
 
-    # link easting and northing coordinates - as a list of tuples ?
-    # coord_tuples = [(easting[i], northing[i]) for i in range(0, len(easting))]
+    # link easting and northing coordinates
 
     # error message if easting and northing vector lengths don't match (also handles empty/NA cases)
     # there should not be any strings now that everything has been converted to a float
@@ -1047,15 +1403,18 @@ def by_tile_aop(
         logging.info("No response from NEON API. Check internet connection")
         return
 
-    #   # check that token was used
+    # check that token was used
     if token and "x-ratelimit-limit" in response.headers:
         check_token(response)
 
     # get the request response dictionary
     response_dict = response.json()
+
     # error message if dpid is not an AOP data product
     if response_dict["data"]["productScienceTeamAbbr"] != "AOP":
-        print(f"NEON {dpid} is not a remote sensing product. Use zipsByProduct()")
+        logging.info(
+            f"NEON {dpid} is not a remote sensing product. Use zips_by_product()"
+        )
         return
 
     # replace collocated site with the site name it's published under
@@ -1108,8 +1467,6 @@ def by_tile_aop(
         # check that pyproj is installed
         try:
             from pyproj import Proj, CRS
-
-            # importlib.import_module('pyproj')
         except ImportError:
             logging.info(
                 "Package pyproj is required for this function to work at the NEON BLAN site. Install and re-try."
@@ -1171,8 +1528,6 @@ def get_buffer_coords(easting, northing, buffer):
         return new_coords
 
     # get the tiles corresponding to the new coordinates (mins and maxes)
-    # if verbose:
-    #     print('getting coordinates of tiles, including the buffer')
     buffer_coords = []
     for e, n in zip(easting, northing):
         buffer_coords.extend(get_buffer_coords(e, n, buffer))
@@ -1207,9 +1562,16 @@ def get_buffer_coords(easting, northing, buffer):
         file_url_df["name"].str.contains("|".join(coord_strs))
     ]
 
+    # remove .txt files (README) and create a copy to use for the checksum/existence checks
     file_url_df_subset2 = file_url_df_subset[
         ~file_url_df_subset["name"].str.endswith(".txt")
-    ]
+    ].copy()
+
+    # file_url_df_subset2.reset_index(drop=True,inplace=True)
+    file_url_df_subset2 = file_url_df_subset2.reset_index(drop=True)
+
+    # print(list(file_url_df_subset2.head(1)))
+    # print(file_url_df_subset2.head())
 
     # if any coordinates were not included in the data, print a warning message
     unique_coords_to_download = set(
@@ -1223,7 +1585,7 @@ def get_buffer_coords(easting, northing, buffer):
     coords_not_found = list(set(coord_strs).difference(list(unique_coords_to_download)))
     if len(coords_not_found) > 0:
         logging.info(
-            "Warning: the following coordinates fall outside the bounds of the NEON site, so will not be downloaded:"
+            "WARNING: the following coordinates fall outside the bounds of the NEON site, so will not be downloaded:"
         )
         for coord in coords_not_found:
             print(",".join(coord.split("_")))
@@ -1231,7 +1593,7 @@ def get_buffer_coords(easting, northing, buffer):
     # get the number of files in the dataframe, if there are no files to download, return
     num_files = len(file_url_df_subset)
     if num_files == 0:
-        logging.info(f"Warning: No NEON {dpid} files found.")
+        logging.info(f"WARNING: No NEON {dpid} files found.")
         return
 
     # get the total size of all the files found
@@ -1261,22 +1623,140 @@ def get_buffer_coords(easting, northing, buffer):
     # print('download path', download_path)
     os.makedirs(download_path, exist_ok=True)
 
-    # serially download all files, with progress bar
+    # Get the sorted list of the files to download
     # use the files from the subsetted dataframe
     files = list(file_url_df_subset["url"])
     files.sort()  # sort the files for consistent download order
-    print(
-        f"Downloading {num_files} NEON data file(s) totaling approximately {download_size}\n"
-    )
-    if verbose:
-        logging.info("Downloading the following data and metadata files:")
-        for file in files:
-            logging.info(file.replace("https://storage.googleapis.com", f"{dpid}"))
-    sleep(1)
-    for file in tqdm(files):
-        download_file(
-            url=file, savepath=download_path, chunk_size=chunk_size, token=token
+
+    # different messages depending on whether skip_if_exists is True or False
+    if skip_if_exists:
+        logging.info(
+            f"Found {num_files} NEON data files totaling approximately {download_size}.\n"
+            "Files in savepath will be checked and skipped if they exist and match the latest version."
+        )
+    else:
+        logging.info(
+            f"Downloading {num_files} NEON data files totaling approximately {download_size}\n"
         )
+        if verbose:
+            logging.info("Downloading the following data and metadata files:")
+            for file in files:
+                logging.info(file.replace("https://storage.googleapis.com", f"{dpid}"))
+
+    if skip_if_exists:
+        # Check which files already exist locally and if their checksums match
+        file_url_df_subset2[
+            ["exists_locally", "checksum_matches"]
+        ] = file_url_df_subset2.apply(
+            lambda row: check_exists_and_checksum(row, download_path), axis=1
+        )
+
+        # Files to skip (already exist locally and checksums match)
+        files_to_skip = file_url_df_subset2[
+            (file_url_df_subset2["exists_locally"])
+            & (file_url_df_subset2["checksum_matches"])
+        ]
+
+        # Files to prompt for overwrite (exists locally but checksums don't match)
+        mismatched_files = file_url_df_subset2[
+            (file_url_df_subset2["exists_locally"])
+            & (~file_url_df_subset2["checksum_matches"])
+        ]
+
+        # Files to download (do not exist locally)
+        files_to_download = file_url_df_subset2[~file_url_df_subset2["exists_locally"]]
+        # print(file_url_df_subset2[["name","exists_locally","checksum_matches"]])
+
+        # if verbose: # print these even in non-verbose mode
+        if not files_to_download.empty:
+            logging.info(
+                "The following files will be downloaded (they do not already exist locally):"
+            )
+            for f in files_to_download["name"].sort_values():
+                logging.info(f"  {f}")
+
+            # Download files that do not exist locally
+            for _, row in tqdm(
+                files_to_download.iterrows(), total=len(files_to_download)
+            ):
+                download_file(
+                    url=row["url"],
+                    savepath=download_path,
+                    chunk_size=chunk_size,
+                    token=token,
+                )
+
+            if not files_to_skip.empty:
+                logging.info(
+                    "The remainder of the files in savepath will not be downloaded. "
+                    "They already exist locally and match the latest available data."
+                )
+
+        # prompt for mismatched files (excluding readme.txt)
+        mismatched_files_no_readme = mismatched_files[
+            ~mismatched_files["name"].str.contains("readme", case=False, na=False)
+        ]
+
+        # identify README files (case-insensitive, usually .txt), there should only be one of these
+        readme_files = mismatched_files[
+            mismatched_files["name"].str.contains("readme", case=False, na=False)
+        ]
+
+        # download README files
+        if not readme_files.empty:
+            logging.info("Downloading README file")
+            for _, row in tqdm(readme_files.iterrows(), total=len(readme_files)):
+                download_file(
+                    url=row["url"],
+                    savepath=download_path,
+                    chunk_size=chunk_size,
+                    token=token,
+                )
+
+        # message if there is nothing to download - all files exist and they all match the checksums
+        if files_to_download.empty and mismatched_files_no_readme.empty:
+            logging.info(
+                "All files already exist locally and match the latest available data. Skipping download."
+            )
+
+        if not mismatched_files_no_readme.empty:
+            logging.info(
+                "The following files exist locally but have a different checksum than the remote files:"
+            )
+            for f in sorted(mismatched_files_no_readme["name"]):
+                logging.info(f"  {f}")
+            # determine whether to overwrite mismatched files
+            if overwrite == "yes":
+                response = "y"
+            elif overwrite == "no":
+                response = "n"
+            else:  # 'prompt' or any other value
+                response = (
+                    input(
+                        "Do you want to overwrite these files with the latest version? (y/n) "
+                    )
+                    .strip()
+                    .lower()
+                )
+
+            if response.lower() == "y":
+                logging.info("Overwriting these files with the latest available data.")
+                for idx, row in tqdm(
+                    mismatched_files.iterrows(), total=len(mismatched_files)
+                ):
+                    download_file(
+                        url=row["url"],
+                        savepath=download_path,
+                        chunk_size=chunk_size,
+                        token=token,
+                    )
+            else:
+                logging.info("Skipped overwriting files with mismatched checksums.")
+    else:  # if skip_if_exists=False (default behavior)
+        for file in tqdm(files):
+            download_file(
+                url=file, savepath=download_path, chunk_size=chunk_size, token=token
+            )
 
     # download issue log table
     ilog = get_issue_log(dpid=dpid, token=None)
diff --git a/src/neonutilities/helper_mods/__pycache__/__init__.cpython-311.pyc b/src/neonutilities/helper_mods/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index 7d1c64e..0000000
Binary files a/src/neonutilities/helper_mods/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/helper_mods/__pycache__/api_helpers.cpython-311.pyc b/src/neonutilities/helper_mods/__pycache__/api_helpers.cpython-311.pyc
deleted file mode 100644
index c3b09cc..0000000
Binary files a/src/neonutilities/helper_mods/__pycache__/api_helpers.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/helper_mods/__pycache__/metadata_helpers.cpython-311.pyc b/src/neonutilities/helper_mods/__pycache__/metadata_helpers.cpython-311.pyc
deleted file mode 100644
index 0f35667..0000000
Binary files a/src/neonutilities/helper_mods/__pycache__/metadata_helpers.cpython-311.pyc and /dev/null differ
diff --git a/src/neonutilities/helper_mods/api_helpers.py b/src/neonutilities/helper_mods/api_helpers.py
index 0c7cc2e..c09d1eb 100644
--- a/src/neonutilities/helper_mods/api_helpers.py
+++ b/src/neonutilities/helper_mods/api_helpers.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+# import crcmod
+import google_crc32c
 import requests
 import re
 import os
@@ -626,6 +628,45 @@ def download_urls(url_set, outpath, token=None, progress=True):
     return None
 
 
+# def calculate_crc32c(filename):
+#     """
+#     This function calculates the CRC32C checksum of a file. This is designed to use
+#     in the check_exists_and_checksum function to .
+
+#     Parameters
+#     ----------
+#     filename : str
+#         Path to the file for which to calculate the CRC32C checksum.
+
+#     Returns
+#     -------
+#     str
+#         The CRC32C checksum as an 8-character hexadecimal string (zero-padded if necessary).
+
+#     Example
+#     -------
+#     >>> calculate_crc32c("myfile.tif")
+#     '036d153e'
+#     """
+#     crc32c_func = crcmod.predefined.mkPredefinedCrcFun('crc-32c')
+#     with open(filename, 'rb') as f:
+#         return format(crc32c_func(f.read()), '08x')
+
+
+def calculate_crc32c(filename, chunk_size=1024 * 1024):
+    """
+    Faster CRC32C checksum using google-crc32c.
+    """
+    crc32c = google_crc32c.Checksum()
+    with open(filename, "rb") as f:
+        while True:
+            chunk = f.read(chunk_size)
+            if not chunk:
+                break
+            crc32c.update(chunk)
+    return crc32c.digest().hex().zfill(8)
+
+
 def download_file(url, savepath, chunk_size=1024, token=None):
     """
     This function downloads a single file from a Google Cloud Storage URL to a user-specified directory.
diff --git a/tests/test_aop_download.py b/tests/test_aop_download.py
index 2e350fc..0fc0b84 100644
--- a/tests/test_aop_download.py
+++ b/tests/test_aop_download.py
@@ -264,7 +264,23 @@ def test_provisional_included_and_data_available_message(self, input_mock):
                 cm.output,
             )
 
-    # other scenarios- check messages but don't download the data ?
+    # overwrite warning when skip_if_exists is False
+    @patch("builtins.input", return_value="n")
+    def test_by_file_aop_overwrite_warning_when_skip_if_exists_false(self, input_mock):
+        with self.assertLogs(level="INFO") as cm:
+            by_file_aop(
+                dpid="DP3.30015.001",
+                site="MCRA",
+                year="2022",
+                skip_if_exists=False,
+                overwrite="no",
+            )
+        self.assertIn(
+            "WARNING: overwrite option only applies if skip_if_exists=True. By default, any existing files will be overwritten unless you select skip_if_exists=True and overwrite='no' or 'prompt' (default).",
+            "\n".join(cm.output),
+        )
+
+    # other scenarios- check messages but don't download the data
     # provisional not included, and data available
     # provisional included, and data available