Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
12685d1
Merge branch 'taeil' of https://github.com/Berkeley-Data/SEN12MS into…
oropezaev Mar 26, 2021
4da32d3
Merge branch 'taeil' of https://github.com/Berkeley-Data/SEN12MS into…
oropezaev Mar 27, 2021
c77a38c
Merge branch 'ernesto' of https://github.com/Berkeley-Data/SEN12MS in…
taeil Mar 28, 2021
c7f62cf
tsne update
oropezaev Mar 28, 2021
e2e8d99
Merge branch 'ernesto' of https://github.com/Berkeley-Data/SEN12MS in…
oropezaev Mar 28, 2021
2f42971
Merge branch 'surya' of https://github.com/Berkeley-Data/SEN12MS into…
taeil Mar 28, 2021
6c158a9
Merge branch 'taeil' of https://github.com/Berkeley-Data/SEN12MS into…
taeil Mar 30, 2021
505122a
Layer Freezing and Conv1x1
oropezaev Mar 31, 2021
3626f86
Merge branch 'taeil' of https://github.com/Berkeley-Data/SEN12MS into…
oropezaev Mar 31, 2021
dcded08
Merge branch 'ernesto' of https://github.com/Berkeley-Data/SEN12MS in…
oropezaev Mar 31, 2021
736af59
Output JSON File with predictions on evaluation
oropezaev Apr 4, 2021
f457b6e
Merge pull request #14 from Berkeley-Data/surya
taeil Apr 6, 2021
b1f653e
merge big earth net change
taeil Apr 6, 2021
1fae202
merge big earth net change
taeil Apr 6, 2021
3184f88
updated parameters
taeil Apr 6, 2021
565d938
Delete main_train_bu.py
oropezaev Apr 6, 2021
cf84255
Delete tsne_from_main_train.py
oropezaev Apr 6, 2021
85ec61e
Merge branch 'taeil' of https://github.com/Berkeley-Data/SEN12MS into…
oropezaev Apr 7, 2021
ff89f2e
Binary using Multilabel Framework
oropezaev Apr 9, 2021
f031fea
Merge branch 'ernesto' of https://github.com/Berkeley-Data/SEN12MS in…
oropezaev Apr 9, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/SEN12MS.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions .idea/deployment.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/encodings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions .idea/remote-mappings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 23 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ In this folder, other utilities that can help to load, process, or analyze the d
```
export WANDB_API_KEY=<use your API key>
export WANDB_ENTITY=cal-capstone
export WANDB_PROJECT=SEN12MS
export WANDB_PROJECT=scene_classification
#export WANDB_MODE=dryrun
```

Expand Down Expand Up @@ -95,38 +95,43 @@ export WANDB_PROJECT=SEN12MS
CUDA_VISIBLE_DEVICES=0 python classification/main_train.py --exp_name sem12ms_baseline --data_dir /home/ubuntu/SEN12MS/data/sen12ms/data --label_split_dir /home/ubuntu/SEN12MS/splits --use_RGB --IGBP_simple --label_type multi_label --threshold 0.1 --model ResNet50 --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 1000 --epochs 1

```

#### finetune (training from pre-trained model) :anguished:

These arguments will be saved into a .txt file automatically. This .txt file can be used in the testing for reading the arguments. The `threshold` parameter is used to filter out the labels with lower probabilities. Note that this threshold has no influence on single-label classification. More explanation of the arguments is in the `main_train.py` file. Note that the probability label file and the split lists should be put under the same folder during training and testing. The script reads .pkl format instead of .txt files.
- `test.py`: This python script is used to test the model. It is a semi-automatic script and reads the argument file generated in the training process to decide the label type, model type etc. However, it still requires user to input some basic arguments, such as the path of data directory. Here is an example of the input arguments:
- `convert_moco_to_resnet50.py`: convert moco models to pytorch resnet50 format

download pretrained models from `s3://sen12ms/pretrained_sup`
#### convert Moco pretrained model for sen12ms eval
(optional) download pretrained models from `s3://sen12ms/pretrained`

Some pretrained models:
- [vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p)
- [silvery-oath-7](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e)

```
## remove dryrun param
aws s3 sync s3://sen12ms/pretrained_sup . --dryrun
aws s3 sync s3://sen12ms/pretrained . --dryrun
```

convert models
convert moco models to pytorch resnet50 format
```
# convert backbone to resnet50
python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth
# convert local file
python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth -o pretrained/moco/

# download the model from W&B and convert for 12 channels
python classification/models/convert_moco_to_resnet50.py -n 12 -i hpt4/367tz8vs -o pretrained/moco/

# convert query-encoder to resnet50
python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth -bb false
# rename file with more user-friendly name (TODO automate this)
mv pretrained/moco/367tz8vs_bb_converted.pth pretrained/moco/laced-water-61_bb_converted.pth

```

finetune with pretrained models
- [vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p)
- [silvery-oath-7](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e)
#### finetune (training from pre-trained model) :anguished:

These arguments will be saved into a .txt file automatically. This .txt file can be used in the testing for reading the arguments. The `threshold` parameter is used to filter out the labels with lower probabilities. Note that this threshold has no influence on single-label classification. More explanation of the arguments is in the `main_train.py` file. Note that the probability label file and the split lists should be put under the same folder during training and testing. The script reads .pkl format instead of .txt files.
- `test.py`: This python script is used to test the model. It is a semi-automatic script and reads the argument file generated in the training process to decide the label type, model type etc. However, it still requires user to input some basic arguments, such as the path of data directory. Here is an example of the input arguments:

```
CUDA_VISIBLE_DEVICES=3 python classification/main_train.py --exp_name finetune --data_dir data/sen12ms/data --label_split_dir splits --use_RGB --IGBP_simple --label_type single_label --threshold 0.1 --model Moco --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 2048 --epochs 500 --pt_name silvery-oath7-2rr3864e --pt_dir pretrained/moco --eval
CUDA_VISIBLE_DEVICES=3 python classification/main_train.py --exp_name finetune --data_dir data/sen12ms/data --label_split_dir splits --sensor_type s1s2 --IGBP_simple --label_type single_label --threshold 0.1 --model Moco --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 2048 --epochs 500 --pt_name silvery-oath7-2rr3864e --pt_dir pretrained/moco --eval
```
- `pt_name`: the name of the model (wandb run name)
- `--eval`: remove this param if you want to skip evaluating after finishing the training
- `sensor_type`: s1, s2, s1s2

Evaluate trained models for classification (this is only if you downloaded the trained model)
```
Expand Down
17 changes: 11 additions & 6 deletions classification/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,14 @@ def load_sample(sample, labels, label_type, threshold, imgTransform, use_s1, use
img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD)
else:
img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD_BIGEARTHNET)
# load only RGB

# load only RGB
if use_RGB and use_s2==False:
if not for_bigearthnet:
img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB)
else:
img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB_BIGEARTHNET)

# load s1 data
if use_s1:
if use_s2 or use_RGB:
Expand All @@ -89,7 +90,7 @@ def load_sample(sample, labels, label_type, threshold, imgTransform, use_s1, use
else:
loc = np.argmax(lc, axis=-1)
lc_hot = np.zeros_like(lc).astype(np.float32)
lc_hot[loc] = 1
lc_hot[loc] = lc[0]

rt_sample = {'image': img, 'label': lc_hot, 'id': sample["id"]}

Expand Down Expand Up @@ -352,7 +353,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
if CLC_s == True:
self.n_classes = 19
else:
self.n_classes = 43
self.n_classes = 1
# self.n_classes = 43

# make sure parent dir exists
assert os.path.exists(path)
Expand All @@ -364,7 +366,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
sample_list = None
total_sample_size = 0
if subset == "train" or subset == "val":
file = os.path.join(ls_dir, f'bigearthnet_train_{data_size}.pkl')
# file = os.path.join(ls_dir, f'bigearthnet_train_{data_size}.pkl')
file = os.path.join(ls_dir, f'BigEarthNet_train_balanced_Permanently_irrigated_land_{data_size}.pkl')
print("BigEarthNet: Loading file ",file)
sample_list = pkl.load(open(file, "rb"))
total_sample_size = len(sample_list)
Expand All @@ -384,6 +387,7 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
else:
pbar = tqdm(total=125866) # 125866 samples in test set
file = os.path.join(ls_dir, 'bigearthnet_test.pkl')

sample_list = pkl.load(open(file, "rb"))
print("bigearthnet_test should be 125866:", len(sample_list))

Expand All @@ -407,7 +411,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
"samples from the bigearthnet subset", subset)

# import lables as a dictionary
label_file = os.path.join(ls_dir, 'BigEarthNet_labels.pkl')
# label_file = os.path.join(ls_dir, 'BigEarthNet_labels.pkl')
label_file = os.path.join(ls_dir, 'BigEarthNet_binary_labels_Permanently_irrigated_land.pkl')

a_file = open(label_file, "rb")
self.labels = pkl.load(a_file)
Expand Down
Loading