Berkeley-Data · oropezaev · Mar 26, 2021 · Mar 27, 2021 · Mar 28, 2021 · Mar 28, 2021
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/SEN12MS.iml b/.idea/SEN12MS.iml
diff --git a/.idea/deployment.xml b/.idea/deployment.xml
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/remote-mappings.xml b/.idea/remote-mappings.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/README.md b/README.md
@@ -57,7 +57,7 @@ In this folder, other utilities that can help to load, process, or analyze the d
 ```
 export WANDB_API_KEY=<use your API key>
 export WANDB_ENTITY=cal-capstone
-export WANDB_PROJECT=SEN12MS
+export WANDB_PROJECT=scene_classification
 #export WANDB_MODE=dryrun
 ```
 
@@ -95,38 +95,43 @@ export WANDB_PROJECT=SEN12MS
 CUDA_VISIBLE_DEVICES=0 python classification/main_train.py --exp_name sem12ms_baseline --data_dir /home/ubuntu/SEN12MS/data/sen12ms/data --label_split_dir /home/ubuntu/SEN12MS/splits --use_RGB --IGBP_simple --label_type multi_label --threshold 0.1 --model ResNet50 --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 1000 --epochs 1
 
 ```
-
- #### finetune (training from pre-trained model)   :anguished:
-
- These arguments will be saved into a .txt file automatically. This .txt file can be used in the testing for reading the arguments. The `threshold` parameter is used to filter out the labels with lower probabilities. Note that this threshold has no influence on single-label classification. More explanation of the arguments is in the `main_train.py` file. Note that the probability label file and the split lists should be put under the same folder during training and testing. The script reads .pkl format instead of .txt files.
-- `test.py`: This python script is used to test the model. It is a semi-automatic script and reads the argument file generated in the training process to decide the label type, model type etc. However, it still requires user to input some basic arguments, such as the path of data directory. Here is an example of the input arguments:  
-- `convert_moco_to_resnet50.py`: convert moco models to pytorch resnet50 format
 
-download pretrained models from `s3://sen12ms/pretrained_sup`
+#### convert Moco pretrained model for sen12ms eval 
+ (optional) download pretrained models from `s3://sen12ms/pretrained`
+
+Some pretrained models: 
+-  [vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p)
+- [silvery-oath-7](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e) 
+
 ```
 ## remove dryrun param
-aws s3 sync s3://sen12ms/pretrained_sup . --dryrun 
+aws s3 sync s3://sen12ms/pretrained . --dryrun 
 ```
 
-convert models 
+convert moco models to pytorch resnet50 format
 ```
-# convert backbone to resnet50 
-python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth 
+# convert local file
+python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth -o pretrained/moco/
+
+# download the model from W&B and convert for 12 channels 
+python classification/models/convert_moco_to_resnet50.py -n 12 -i hpt4/367tz8vs -o pretrained/moco/ 
 
-# convert query-encoder to resnet50 
-python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth -bb false 
+# rename file with more user-friendly name (TODO automate this)
+mv pretrained/moco/367tz8vs_bb_converted.pth pretrained/moco/laced-water-61_bb_converted.pth
 
 ```
 
-finetune with pretrained models 
--  [vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p)
-- [silvery-oath-7](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e) 
+ #### finetune (training from pre-trained model)   :anguished:
+
+ These arguments will be saved into a .txt file automatically. This .txt file can be used in the testing for reading the arguments. The `threshold` parameter is used to filter out the labels with lower probabilities. Note that this threshold has no influence on single-label classification. More explanation of the arguments is in the `main_train.py` file. Note that the probability label file and the split lists should be put under the same folder during training and testing. The script reads .pkl format instead of .txt files.
+- `test.py`: This python script is used to test the model. It is a semi-automatic script and reads the argument file generated in the training process to decide the label type, model type etc. However, it still requires user to input some basic arguments, such as the path of data directory. Here is an example of the input arguments:  
 
  ``` 
-CUDA_VISIBLE_DEVICES=3 python classification/main_train.py --exp_name finetune --data_dir data/sen12ms/data --label_split_dir splits --use_RGB --IGBP_simple --label_type single_label --threshold 0.1 --model Moco --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 2048 --epochs 500 --pt_name silvery-oath7-2rr3864e --pt_dir pretrained/moco --eval
+CUDA_VISIBLE_DEVICES=3 python classification/main_train.py --exp_name finetune --data_dir data/sen12ms/data --label_split_dir splits --sensor_type s1s2 --IGBP_simple --label_type single_label --threshold 0.1 --model Moco --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 2048 --epochs 500 --pt_name silvery-oath7-2rr3864e --pt_dir pretrained/moco --eval
  ```
 - `pt_name`: the name of the model (wandb run name)
 - `--eval`: remove this param if you want to skip evaluating after finishing the training 
+- `sensor_type`: s1, s2, s1s2 
 
 Evaluate trained models for classification (this is only if you downloaded the trained model)
 ```

diff --git a/classification/dataset.py b/classification/dataset.py
@@ -58,13 +58,14 @@ def load_sample(sample, labels, label_type, threshold, imgTransform, use_s1, use
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD)
         else:
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD_BIGEARTHNET)
-    # load only RGB   
+
+    # load only RGB
     if use_RGB and use_s2==False:
         if not for_bigearthnet:
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB)
         else:
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB_BIGEARTHNET)
-        
+
     # load s1 data
     if use_s1:
         if use_s2 or use_RGB:
@@ -89,7 +90,7 @@ def load_sample(sample, labels, label_type, threshold, imgTransform, use_s1, use
     else:
         loc = np.argmax(lc, axis=-1)
         lc_hot = np.zeros_like(lc).astype(np.float32)
-        lc_hot[loc] = 1
+        lc_hot[loc] = lc[0]
 
     rt_sample = {'image': img, 'label': lc_hot, 'id': sample["id"]}
 
@@ -352,7 +353,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
         if CLC_s == True:
             self.n_classes = 19
         else:
-            self.n_classes = 43
+            self.n_classes = 1
+            # self.n_classes = 43
 
             # make sure parent dir exists
         assert os.path.exists(path)
@@ -364,7 +366,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
             sample_list = None
             total_sample_size = 0
             if subset == "train" or subset == "val":
-                file = os.path.join(ls_dir, f'bigearthnet_train_{data_size}.pkl')
+                # file = os.path.join(ls_dir, f'bigearthnet_train_{data_size}.pkl')
+                file = os.path.join(ls_dir, f'BigEarthNet_train_balanced_Permanently_irrigated_land_{data_size}.pkl')
                 print("BigEarthNet: Loading file ",file)
                 sample_list = pkl.load(open(file, "rb"))
                 total_sample_size = len(sample_list)
@@ -384,6 +387,7 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
             else:
                 pbar = tqdm(total=125866)  # 125866 samples in test set
                 file = os.path.join(ls_dir, 'bigearthnet_test.pkl')
+
                 sample_list = pkl.load(open(file, "rb"))
                 print("bigearthnet_test should be 125866:", len(sample_list))
 
@@ -407,7 +411,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
               "samples from the bigearthnet subset", subset)
 
         # import lables as a dictionary
-        label_file = os.path.join(ls_dir, 'BigEarthNet_labels.pkl')
+        # label_file = os.path.join(ls_dir, 'BigEarthNet_labels.pkl')
+        label_file = os.path.join(ls_dir, 'BigEarthNet_binary_labels_Permanently_irrigated_land.pkl')
 
         a_file = open(label_file, "rb")
         self.labels = pkl.load(a_file)