-
Notifications
You must be signed in to change notification settings - Fork 565
Paper: Performing Object Detection on Drone Orthomosaics with Meta's Segment Anything Model (SAM) #1106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 2025
Are you sure you want to change the base?
Paper: Performing Object Detection on Drone Orthomosaics with Meta's Segment Anything Model (SAM) #1106
Changes from 9 commits
862301f
2804b9c
fdf1de4
aa38dbc
d810340
a4a0631
f9cbdac
c2684b9
92daf7e
92b580f
3e0a8ae
62d49d5
3018dfd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
@Manual{gdal, | ||
title = {{GDAL/OGR} Geospatial Data Abstraction software Library}, | ||
author = {{GDAL/OGR contributors}}, | ||
organization = {Open Source Geospatial Foundation}, | ||
year = {2025}, | ||
url = {https://gdal.org}, | ||
doi = {10.5281/zenodo.5884351}, | ||
} | ||
|
||
@misc{geopandas, | ||
author = {Kelsey Jordahl and | ||
Joris Van den Bossche and | ||
Martin Fleischmann and | ||
Jacob Wasserman and | ||
James McBride and | ||
Jeffrey Gerard and | ||
Jeff Tratner and | ||
Matthew Perry and | ||
Adrian Garcia Badaracco and | ||
Carson Farmer and | ||
Geir Arne Hjelle and | ||
Alan D. Snow and | ||
Micah Cochran and | ||
Sean Gillies and | ||
Lucas Culbertson and | ||
Matt Bartos and | ||
Nick Eubank and | ||
maxalbert and | ||
Aleksey Bilogur and | ||
Sergio Rey and | ||
Christopher Ren and | ||
Dani Arribas-Bel and | ||
Leah Wasser and | ||
Levi John Wolf and | ||
Martin Journois and | ||
Joshua Wilson and | ||
Adam Greenhall and | ||
Chris Holdgraf and | ||
Filipe and | ||
Fran\c{c}ois Leblanc}, | ||
title = {geopandas/geopandas: v0.8.1}, | ||
month = jul, | ||
year = 2020, | ||
publisher = {Zenodo}, | ||
version = {v0.8.1}, | ||
doi = {10.5281/zenodo.3946761}, | ||
url = {https://doi.org/10.5281/zenodo.3946761} | ||
} | ||
|
||
@misc{kirillov23, | ||
title = {Segment Anything}, | ||
author = {Alexander Kirillov and Eric Mintun and Nikhila Ravi and Hanzi Mao and Chloe Rolland and Laura Gustafson and Tete Xiao and Spencer Whitehead and Alexander C. Berg and Wan-Yen Lo and Piotr Dollár and Ross Girshick}, | ||
year = {2023}, | ||
eprint = {2304.02643}, | ||
archivePrefix = {arXiv}, | ||
primaryClass = {cs.CV}, | ||
doi = {https://doi.org/10.48550/arXiv.2304.02643}, | ||
url = {https://arxiv.org/abs/2304.02643}, | ||
} | ||
|
||
@article{matplotlib, | ||
abstract = {Matplotlib is a 2D graphics package used for Python for application development, interactive scripting, and publication-quality image generation across user interfaces and operating systems.}, | ||
author = {Hunter, J. D.}, | ||
publisher = {IEEE COMPUTER SOC}, | ||
year = {2007}, | ||
doi = {https://doi.org/10.1109/MCSE.2007.55}, | ||
journal = {Computing in Science \& Engineering}, | ||
number = {3}, | ||
pages = {90--95}, | ||
title = {Matplotlib: A 2D graphics environment}, | ||
volume = {9}, | ||
} | ||
|
||
@misc{mayladan23, | ||
title = {Zero-Shot Refinement of Buildings' Segmentation Models using SAM}, | ||
author = {Ali Mayladan and Hasan Nasrallah and Hasan Moughnieh and Mustafa Shukor and Ali J. Ghandour}, | ||
year = {2024}, | ||
eprint = {2310.01845}, | ||
archivePrefix = {arXiv}, | ||
primaryClass = {cs.CV}, | ||
doi = {https://doi.org/10.48550/arXiv.2310.01845}, | ||
url = {https://arxiv.org/abs/2310.01845}, | ||
} | ||
|
||
@article{numpy, | ||
author = {Harris, Charles R. and Millman, K. Jarrod and van der Walt, Stéfan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and van Kerkwijk, Marten H. and Brett, Matthew and Haldane, Allan and del Río, Jaime Fernández and Wiebe, Mark and Peterson, Pearu and Gérard-Marchant, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.}, | ||
publisher = {Springer Science and Business Media {LLC}}, | ||
doi = {https://doi.org/10.1038/s41586-020-2649-2}, | ||
date = {2020-09}, | ||
year = {2020}, | ||
journal = {Nature}, | ||
number = {7825}, | ||
pages = {357--362}, | ||
title = {Array programming with {NumPy}}, | ||
volume = {585}, | ||
} | ||
|
||
@article{opencv, | ||
author = {Bradski, G.}, | ||
citeulike-article-id = {2236121}, | ||
journal = {Dr. Dobb's Journal of Software Tools}, | ||
keywords = {bibtex-import}, | ||
posted-at = {2008-01-15 19:21:54}, | ||
priority = {4}, | ||
title = {{The OpenCV Library}}, | ||
year = {2000} | ||
} | ||
|
||
|
||
@misc{openpyxl, | ||
author = {Eric Gazoni and Charlie Clark}, | ||
title = {OpenPyXL: A Python library to read/write Excel 2010 xlsx/xlsm/xltx/xltm files}, | ||
year = {2024}, | ||
howpublished = {Python Package}, | ||
url = {https://openpyxl.readthedocs.io}, | ||
note = {Version 3.1.4}, | ||
} | ||
|
||
@misc{osco23, | ||
title = {The Segment Anything Model (SAM) for Remote Sensing Applications: From Zero to One Shot}, | ||
author = {Lucas Prado Osco and Qiusheng Wu and Eduardo Lopes de Lemos and Wesley Nunes Gonçalves and Ana Paula Marques Ramos and Jonathan Li and José Marcato Junior}, | ||
year = {2023}, | ||
eprint = {2306.16623}, | ||
archivePrefix = {arXiv}, | ||
primaryClass = {cs.CV}, | ||
doi = {https://doi.org/10.48550/arXiv.2306.16623}, | ||
url = {https://arxiv.org/abs/2306.16623}, | ||
} | ||
|
||
@inproceedings{pandas2, | ||
author = {Wes McKinney}, | ||
title = {{D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython}, | ||
booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference}, | ||
pages = {56 - 61}, | ||
year = {2010}, | ||
editor = {{S}t\'efan van der {W}alt and {J}arrod {M}illman}, | ||
doi = {https://doi.org/10.25080/Majora-92bf1922-00a}, | ||
} | ||
|
||
@misc{pandas1, | ||
author = {{The Pandas Development Team}}, | ||
title = {pandas-dev/pandas: Pandas}, | ||
month = feb, | ||
year = {2020}, | ||
publisher = {Zenodo}, | ||
version = {latest}, | ||
url = {https://doi.org/10.5281/zenodo.3509134}, | ||
} | ||
|
||
@misc{pillow, | ||
title = {Pillow (PIL Fork) Documentation}, | ||
author = {Clark, Alex}, | ||
year = {2015}, | ||
publisher = {readthedocs}, | ||
url = {https://buildmedia.readthedocs.org/media/pdf/pillow/latest/pillow.pdf} | ||
} | ||
|
||
@article{pytorch, | ||
title = {PyTorch: An Imperative Style, High‑Performance Deep Learning Library}, | ||
author = {Adam Paszke and Sam Gross and Francisco Massa and Adam Lerer and James Bradbury and Gregory Chanan and Trevor Killeen and Zeming Lin and Natalia Gimelshein and Luca Antiga and Alban Desmaison and Andreas Köpf and Edward Yang and Zachary DeVito and Martin Raison and Alykhan Tejani and Sasank Chilamkurthy and Benoit Steiner and Lu Fang and Junjie Bai and Soumith Chintala}, | ||
journal = {CoRR}, | ||
volume = {abs/1912.01703}, | ||
year = {2019}, | ||
url = {https://arxiv.org/abs/1912.01703} | ||
} | ||
|
||
@Manual{qgis, | ||
title = {QGIS Geographic Information System}, | ||
author = {{QGIS Development Team}}, | ||
organization = {QGIS Association}, | ||
year = {2021}, | ||
url = {https://www.qgis.org}, | ||
} | ||
|
||
@software{rasterio, | ||
author = {Sean Gillies and others}, | ||
organization = {Mapbox}, | ||
title = {Rasterio: geospatial raster I/O for {Python} programmers}, | ||
year = {2013--}, | ||
url = "https://github.com/rasterio/rasterio" | ||
} | ||
|
||
@software{shapely, | ||
author = {Gillies, S. and van der Wel, C. and Van den Bossche, J. and Taves, M. W. and Arnott, J. and Ward, B. C. and others}, | ||
title = {Shapely (Version 2.1.1)}, | ||
year = {2025}, | ||
doi = {10.5281/zenodo.5597138} | ||
} | ||
|
||
@article{wu23, | ||
author = {Wu, Qiusheng and Osco, Lucas P.}, | ||
title = {samgeo: A Python package for segmenting geospatial data with the Segment Anything Model (SAM)}, | ||
journal = {Journal of Open Source Software}, | ||
year = {2023}, | ||
volume = {8}, | ||
number = {89}, | ||
pages = {5663}, | ||
doi = {https://doi.org/10.21105/joss.05663}, | ||
url = {https://arxiv.org/abs/2306.16623} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
version: 1 | ||
extends: ../papers.yml | ||
project: | ||
# Update this to match `scipy-2025-<folder>` the folder should be `<firstname_surname>` | ||
id: scipy-2025-nicholas_mccarty | ||
# Ensure your title is the same as in your `main.md` | ||
title: Performing Object Detection on Drone Orthomosaics with Meta's Segment Anything Model (SAM) | ||
# subtitle: | ||
description: This article presents a workflow that utilizes SAM's automatic mask generation skill to effectively perform the task of object detection zero-shot on a high-resolution drone orthomosaic. The generated output is 20% more spatially accurate than that produced using proprietary software, with 400% greater IoU. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question, which indicates that I need to add clarity on this front. We benchmarked our results against the output produced using proprietary software; our output is more spatially accurate (the centers of our detected objects are closer to the QC points) and our polygons cover the actual objects better (our generated mask polygons have 400% greater IoU than the bounding boxes generated using the proprietary software). Will add this to my list of edits. |
||
# Authors should have affiliations, emails and ORCIDs if available | ||
authors: | ||
- name: Nicholas McCarty | ||
email: [email protected] | ||
orcid: 0009-0001-3727-9178 | ||
affiliation: Upskilled Consulting | ||
roles: | ||
- Conceptualization | ||
- Methodology | ||
- Investigation | ||
keywords: | ||
- object detection | ||
- spatial localization | ||
- drone orthomosaic | ||
# Add the abbreviations that you use in your paper here | ||
abbreviations: | ||
MyST: Markedly Structured Text | ||
# It is possible to explicitly ignore the `doi-exists` check for certain citation keys | ||
error_rules: | ||
- rule: doi-exists | ||
severity: ignore | ||
keys: | ||
- rasterio | ||
- opencv | ||
- openpyxl | ||
- pillow | ||
- pytorch | ||
- qgis | ||
exports: | ||
- id: pdf | ||
format: typst | ||
template: https://github.com/curvenote-templates/scipy.git | ||
article: main.md | ||
output: full_text.pdf |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Curious why is 80% used as threshold? Is it a hyper-paramter for optimized performance, or a standard practice?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Empirically, through iteration, we discovered that threshold produced more useful results.