LikeLy-Journey · juliantang324 · May 3, 2020 · Jul 3, 2024 · Jul 3, 2024 · Jul 3, 2024
diff --git a/.gitignore b/.gitignore
@@ -20,6 +20,7 @@ parts/
 sdist/
 var/
 wheels/
+share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
@@ -38,14 +39,17 @@ pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
+.nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
+*.py,cover
 .hypothesis/
 .pytest_cache/
+cover/
 
 # Translations
 *.mo
@@ -55,6 +59,7 @@ coverage.xml
 *.log
 local_settings.py
 db.sqlite3
+db.sqlite3-journal
 
 # Flask stuff:
 instance/
@@ -67,16 +72,51 @@ instance/
 docs/_build/
 
 # PyBuilder
+.pybuilder/
 target/
 
 # Jupyter Notebook
 .ipynb_checkpoints
 
-# pyenv
-.python-version
+# IPython
+profile_default/
+ipython_config.py
 
-# celery beat schedule file
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
 celerybeat-schedule
+celerybeat.pid
 
 # SageMath parsed files
 *.sage.py
@@ -102,6 +142,21 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
 
-# mac os
-__MACOSX/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
diff --git a/README.md b/README.md
@@ -1,34 +1,44 @@
 # PyTorch for Semantic Segmentation
+
+## What's new
+- Implemented backbone code of swin transformer from detectron2.
+- Implemented model UPerNet from mmsegmentation along with pretrained checkpoints.
+- Implemented model MaskFormer from detectron2 along with pretrained checkpoints.
+
 ## Introduce
 This repository contains some models for semantic segmentation and the pipeline of training and testing models, 
 implemented in PyTorch.
 
+
 ![](docs/images/demo.png)
 
 ## Model zoo
 
-|Model|Backbone|Datasets|eval size|Mean IoU(paper)|Mean IoU(this repo)|
-|:-:|:-:|:-:|:-:|:-:|:-:|
-|DeepLabv3_plus|xception65|cityscape(val)|(1025,2049)|78.8|[78.93](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_xception_segmentron.pth)|
-|DeepLabv3_plus|xception65|coco(val)|480/520|-|[70.50](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_xception_coco_segmentron.pth)|
-|DeepLabv3_plus|xception65|pascal_aug(val)|480/520|-|[89.56](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_xception_pascal_aug_segmentron.pth)|
-|DeepLabv3_plus|xception65|pascal_voc(val)|480/520|-|[88.39](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_xception_pascal_voc_segmentron.pth)|
-|DeepLabv3_plus|resnet101|cityscape(val)|(1025,2049)|-|[78.27](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_resnet101_segmentron.pth)|
-|Danet|resnet101|cityscape(val)|(1024,2048)|79.9|[79.34](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/danet101_segmentron.pth)|
-|Pspnet|resnet101|cityscape(val)|(1025,2049)|78.63|[77.00](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/pspnet_resnet101_segmentron.pth)|
+|          Model          |  Backbone  |    Datasets     |  eval size  | Mean IoU(paper) |                                                       Mean IoU(this repo)                                                        |
+|:-----------------------:|:----------:|:---------------:|:-----------:|:---------------:|:--------------------------------------------------------------------------------------------------------------------------------:|
+|     DeepLabv3_plus      | xception65 | cityscape(val)  | (1025,2049) |      78.8       |      [78.93](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_xception_segmentron.pth)       |
+|     DeepLabv3_plus      | xception65 |    coco(val)    |   480/520   |        -        |    [70.50](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_xception_coco_segmentron.pth)    |
+|     DeepLabv3_plus      | xception65 | pascal_aug(val) |   480/520   |        -        | [89.56](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_xception_pascal_aug_segmentron.pth) |
+|     DeepLabv3_plus      | xception65 | pascal_voc(val) |   480/520   |        -        | [88.39](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_xception_pascal_voc_segmentron.pth) |
+|     DeepLabv3_plus      | resnet101  | cityscape(val)  | (1025,2049) |        -        |      [78.27](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_resnet101_segmentron.pth)      |
+|          Danet          | resnet101  | cityscape(val)  | (1024,2048) |      79.9       |              [79.34](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/danet101_segmentron.pth)              |
+|         Pspnet          | resnet101  | cityscape(val)  | (1025,2049) |      78.63      |          [77.00](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/pspnet_resnet101_segmentron.pth)          |
+|         UPerNet         |   Swin-b   |   ade20k(val)   |  (480,480)  |      51.6       |            [49.12](https://github.com/juliantang324/SegmenTron/releases/download/v0.1.0/upernet_swinb_segmentron.pth)            |
+|       MaskFormer        |   Swin-t   |   ade20k(val)   |  (480,480)  |      48.8       |          [47.11](https://github.com/juliantang324/SegmenTron/releases/download/v0.1.0/maskformer_swint_segmentron.pth)           |
+|       MaskFormer        |   Swin-b   |   ade20k(val)   |  (480,480)  |      52.3       |          [51.87](https://github.com/juliantang324/SegmenTron/releases/download/v0.1.0/maskformer_swinb_segmentron.pth)           |
 
 ### real-time models
-Model|Backbone|Datasets|eval size|Mean IoU(paper)|Mean IoU(this repo)|FPS|
-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
-|ICnet|resnet50(0.5)|cityscape(val)|(1024,2048)|67.8|-|41.39|
-|DeepLabv3_plus|mobilenetV2|cityscape(val)|(1024,2048)|70.7|[70.3](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_mobilenetv2_segmentron.pth)|46.64|
-|BiSeNet|resnet18|cityscape(val)|(1024,2048)|-|-|39.90|
-|LEDNet|-|cityscape(val)|(1024,2048)|-|-|31.78|
-|CGNet|-|cityscape(val)|(1024,2048)|-|-|46.11|
-|HardNet|-|cityscape(val)|(1024,2048)|75.9|-|69.06|
-|DFANet|xceptionA|cityscape(val)|(1024,2048)|70.3|-|21.46|
-|HRNet|w18_small_v1|cityscape(val)|(1024,2048)|70.3|[70.5](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/hrnet_w18_small_v1_segmentron.pth)|66.01|
-|Fast_SCNN|-|cityscape(val)|(1024,2048)|68.3|[68.9](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/fast_scnn_segmentron.pth)|145.77|
+|     Model      |   Backbone    |    Datasets    |  eval size  | Mean IoU(paper) |                                                   Mean IoU(this repo)                                                   |  FPS   |
+|:--------------:|:-------------:|:--------------:|:-----------:|:---------------:|:-----------------------------------------------------------------------------------------------------------------------:|:------:|
+|     ICnet      | resnet50(0.5) | cityscape(val) | (1024,2048) |      67.8       |                                                            -                                                            | 41.39  |
+| DeepLabv3_plus |  mobilenetV2  | cityscape(val) | (1024,2048) |      70.7       | [70.3](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/deeplabv3_plus_mobilenetv2_segmentron.pth) | 46.64  |
+|    BiSeNet     |   resnet18    | cityscape(val) | (1024,2048) |        -        |                                                            -                                                            | 39.90  |
+|     LEDNet     |       -       | cityscape(val) | (1024,2048) |        -        |                                                            -                                                            | 31.78  |
+|     CGNet      |       -       | cityscape(val) | (1024,2048) |        -        |                                                            -                                                            | 46.11  |
+|    HardNet     |       -       | cityscape(val) | (1024,2048) |      75.9       |                                                            -                                                            | 69.06  |
+|     DFANet     |   xceptionA   | cityscape(val) | (1024,2048) |      70.3       |                                                            -                                                            | 21.46  |
+|     HRNet      | w18_small_v1  | cityscape(val) | (1024,2048) |      70.3       |     [70.5](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/hrnet_w18_small_v1_segmentron.pth)     | 66.01  |
+|   Fast_SCNN    |       -       | cityscape(val) | (1024,2048) |      68.3       |         [68.9](https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/fast_scnn_segmentron.pth)          | 145.77 |
 
 FPS was tested on V100.
 
@@ -74,6 +84,9 @@ pretrained backbone models will be download automatically in pytorch default dir
 CUDA_VISIBLE_DEVICES=0 python -u tools/train.py --config-file configs/cityscapes_deeplabv3_plus.yaml
 ```
 ### Train with multiple GPUs
+
+Auto-mixed precision is turned on by default. To turn it off, add argument "--use-amp".
+
 ```
 CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
 ```
@@ -96,3 +109,5 @@ TEST.TEST_MODEL_PATH your_test_model_path
 - [PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding)
 - [detectron2](https://github.com/facebookresearch/detectron2)
 - [gloun-cv](https://github.com/dmlc/gluon-cv)
+- [mmsegmentation](https://github.com/open-mmlab/mmsegmentation)
+- [MaskFormer](https://github.com/facebookresearch/MaskFormer)
diff --git a/configs/ade20k_maskformer_swin_b.yaml b/configs/ade20k_maskformer_swin_b.yaml
@@ -0,0 +1,30 @@
+DATASET:
+    NAME: "ade20k"
+    MEAN: [0.5, 0.5, 0.5]
+    STD: [0.5, 0.5, 0.5]
+    MODE: 'val'
+TRAIN:
+    EPOCHS: 120
+    BATCH_SIZE: 16
+    CROP_SIZE: 480
+    BASE_SIZE: 520
+    BACKBONE_PRETRAINED_KEY: "model"
+TEST:
+    BATCH_SIZE: 16
+
+SOLVER:
+    LR: 6e-5
+    DECODER_LR_FACTOR: 1.0
+    WEIGHT_DECAY: 1e-2
+    WARMUP:
+        EPOCHS: 2.37
+        FACTOR: 1e-6
+    OPTIMIZER: "adamw"
+    AUX: True
+
+MODEL:
+    MODEL_NAME: "MaskFormer"
+    BACKBONE: "swin_b_224"
+    BN_TYPE: "LN"
+    MASKFORMER:
+        IN_CHANNELS: [128, 256, 512, 1024]
diff --git a/configs/ade20k_maskformer_swin_t.yaml b/configs/ade20k_maskformer_swin_t.yaml
@@ -0,0 +1,30 @@
+DATASET:
+    NAME: "ade20k"
+    MEAN: [0.5, 0.5, 0.5]
+    STD: [0.5, 0.5, 0.5]
+    MODE: 'val'
+TRAIN:
+    EPOCHS: 120
+    BATCH_SIZE: 16
+    CROP_SIZE: 480
+    BASE_SIZE: 520
+    BACKBONE_PRETRAINED_KEY: "model"
+TEST:
+    BATCH_SIZE: 16
+
+SOLVER:
+    LR: 6e-5
+    DECODER_LR_FACTOR: 1.0
+    WEIGHT_DECAY: 1e-2
+    WARMUP:
+        EPOCHS: 2.37
+        FACTOR: 1e-6
+    OPTIMIZER: "adamw"
+    AUX: True
+
+MODEL:
+    MODEL_NAME: "MaskFormer"
+    BACKBONE: "swin_t_224"
+    BN_TYPE: "LN"
+    MASKFORMER:
+        IN_CHANNELS: [96, 192, 384, 768]
diff --git a/configs/ade20k_upernet_swin.yaml b/configs/ade20k_upernet_swin.yaml
@@ -0,0 +1,26 @@
+DATASET:
+    NAME: "ade20k"
+    MEAN: [0.5, 0.5, 0.5]
+    STD: [0.5, 0.5, 0.5]
+    MODE: 'val'
+TRAIN:
+    EPOCHS: 120
+    BATCH_SIZE: 8
+    CROP_SIZE: 480
+    BASE_SIZE: 520
+    BACKBONE_PRETRAINED_KEY: "model"
+TEST:
+    BATCH_SIZE: 8
+
+SOLVER:
+    LOSS_NAME: "dice"
+    OPTIMIZER: "adamw"
+    LR: 0.002
+
+MODEL:
+    MODEL_NAME: "UPerNet"
+    BACKBONE: "swin_t_224"
+    BN_TYPE: "LN"
+    BN_EPS_FOR_ENCODER: 1e-3
+    UPERNET:
+        IN_CHANNELS: [96, 192, 384, 768]
diff --git a/configs/cityscapes_upernet_swin.yaml b/configs/cityscapes_upernet_swin.yaml
@@ -0,0 +1,27 @@
+DATASET:
+    NAME: "cityscape"
+    MEAN: [.485, .456, .406]
+    STD: [.229, .224, .225]
+TRAIN:
+    EPOCHS: 100
+    BATCH_SIZE: 4
+    CROP_SIZE: 713
+    BACKBONE_PRETRAINED_KEY: 'model'
+TEST:
+    BATCH_SIZE: 4
+    CROP_SIZE: (1025, 2049)
+
+SOLVER:
+    LR: 0.001
+    OPTIMIZER: "adamw"
+    AUX: True
+    AUX_WEIGHT: 0.4
+
+AUG:
+    BLUR_PROB: 0.5
+
+MODEL:
+    MODEL_NAME: "UPerNet"
+    BACKBONE: "swin_b_224"
+    BN_TYPE: "LN"
+    UPERNET.IN_CHANNELS: [128, 256, 512, 1024]
diff --git a/configs/pascal_voc_maskformer_swin.yaml b/configs/pascal_voc_maskformer_swin.yaml
@@ -0,0 +1,31 @@
+DATASET:
+    NAME: "pascal_voc"
+    MEAN: [.485, .456, .406]
+    STD: [.229, .224, .225]
+    MODE: 'val'
+TRAIN:
+    EPOCHS: 50
+    BATCH_SIZE: 8
+    CROP_SIZE: 480
+    BASE_SIZE: 520
+    BACKBONE_PRETRAINED_KEY: 'model'
+
+TEST:
+    BATCH_SIZE: 8
+
+SOLVER:
+    LR: 6e-5
+    DECODER_LR_FACTOR: 1.0
+    WEIGHT_DECAY: 1e-2
+    WARMUP:
+        EPOCHS: 2.37
+        FACTOR: 1e-6
+    OPTIMIZER: "adamw"
+    AUX: True
+
+MODEL:
+    MODEL_NAME: "MaskFormer"
+    BACKBONE: "swin_t_224"
+    BN_TYPE: "LN"
+    MASKFORMER:
+        IN_CHANNELS: [96, 192, 384, 768]
diff --git a/configs/pascal_voc_pspnet_resnet.yaml b/configs/pascal_voc_pspnet_resnet.yaml
@@ -0,0 +1,26 @@
+DATASET:
+    NAME: "pascal_voc"
+    MEAN: [0.5, 0.5, 0.5]
+    STD: [0.5, 0.5, 0.5]
+    MODE: 'val'
+TRAIN:
+    EPOCHS: 200
+    BATCH_SIZE: 4
+    CROP_SIZE: 713
+TEST:
+    BATCH_SIZE: 4
+    CROP_SIZE: (1025, 2049)
+#    TEST_MODEL_PATH: trained_models/pspnet_resnet101_segmentron.pth
+
+SOLVER:
+    LR: 0.01
+    AUX: True
+    AUX_WEIGHT: 0.4
+
+AUG:
+    BLUR_PROB: 0.5
+
+MODEL:
+    MODEL_NAME: "PSPNet"
+    BACKBONE: "resnet101"
+    OUTPUT_STRIDE: 8
diff --git a/configs/pascal_voc_upernet_resnet.yaml b/configs/pascal_voc_upernet_resnet.yaml
@@ -0,0 +1,22 @@
+DATASET:
+    NAME: "pascal_voc"
+    MEAN: [0.5, 0.5, 0.5]
+    STD: [0.5, 0.5, 0.5]
+    MODE: 'val'
+TRAIN:
+    EPOCHS: 50
+    BATCH_SIZE: 8
+    CROP_SIZE: 224
+    BASE_SIZE: 520
+
+TEST:
+    BATCH_SIZE: 8
+
+SOLVER:
+    LR: 0.0001
+
+MODEL:
+    MODEL_NAME: "UPerNet"
+    UPERNET.IN_CHANNELS: [256, 512, 1024, 2048]
+    BACKBONE: "resnet101"
+    BN_EPS_FOR_ENCODER: 1e-3