CompVis · MasahiroOgawa · Jun 3, 2025 · Jun 3, 2025 · Jun 3, 2025 · Jun 3, 2025
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,26 @@
+# Ignore the following files and directories when building the Docker image
+*.pyc
+__pycache__/
+*.ipynb_checkpoints
+*.log
+*.csv
+*.tsv
+*.h5
+*.pth
+*.pt
+*.zip
+*.tar.gz
+*.egg-info/
+dist/
+build/
+.env
+venv/
+.env.local
+*.DS_Store
+*.egg
+*.whl
+*.pkl
+*.json
+*.yaml
+*.yml
+submodules/
diff --git a/.gitignore b/.gitignore
@@ -158,3 +158,6 @@ dmypy.json
 # Pyre type checker
 .pyre/
 learnableearthparser/fast_sampler/_sampler.c
+
+# data
+data/
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,60 @@
+FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
+
+# Set the working directory
+WORKDIR /EDGS
+
+# Install system dependencies first, including git, build-essential, and cmake
+RUN apt-get update && apt-get install -y \
+  git \
+  wget \
+  build-essential \
+  cmake \
+  ninja-build \
+  libgl1-mesa-glx \
+  libglib2.0-0 \
+  && rm -rf /var/lib/apt/lists/*
+
+# Copy only essential files for cloning submodules first (e.g., .gitmodules)
+# Or, if submodules are public, you might not need to copy anything specific for this step
+# For simplicity, we'll copy everything, but this could be optimized
+COPY . .
+
+# Initialize and update submodules
+RUN git submodule init && git submodule update --recursive
+
+# Install Miniconda
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \
+  bash /tmp/miniconda.sh -b -p /opt/conda && \
+  rm /tmp/miniconda.sh
+ENV PATH="/opt/conda/bin:${PATH}"
+
+# Create the conda environment and install dependencies
+# Accept Anaconda TOS before using conda
+RUN conda init bash && \
+  conda config --set always_yes yes --set changeps1 no && \
+  conda config --add channels defaults && \
+  conda config --set channel_priority strict && \
+  conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \
+  conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
+# Now you can safely create your environment
+RUN conda create -y -n edgs python=3.10 pip && \
+  conda clean -afy && \
+  echo "source activate edgs" > ~/.bashrc
+
+# Set CUDA architectures to compile for
+ENV TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX"
+
+# Activate the environment and install Python dependencies
+RUN /bin/bash -c "source activate edgs && \
+  pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \
+  pip install -e ./submodules/gaussian-splatting/submodules/diff-gaussian-rasterization && \
+  pip install -e ./submodules/gaussian-splatting/submodules/simple-knn && \
+  pip install pycolmap wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg && \
+  pip install -e ./submodules/RoMa && \
+  pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3d jupyterlab matplotlib"
+
+# Expose the port for Gradio
+EXPOSE 7862
+
+# Keep the container running in detached mode
+CMD ["tail", "-f", "/dev/null"]
diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@ Renderings become <strong>nearly indistinguishable from ground truth after only
 - [🚀 Quickstart](#sec-quickstart)
 - [🛠️ Installation](#sec-install)
 - [📦 Data](#sec-data)
-
+- [🎬 Video Processing Improvements](#video-processing-improvements)
 - [🏋️ Training](#sec-training)
 - [🏗️ Reusing Our Model](#sec-reuse)
 - [📄 Citation](#sec-citation)
@@ -69,55 +69,121 @@ Alternatively, check our [Colab notebook](https://colab.research.google.com/gith
 <a id="sec-install"></a>
 ## 🛠️ Installation
 
-You can either run `install.sh` or manually install using the following:
+You can install it just:
 
 ```bash
-git clone [email protected]:CompVis/EDGS.git --recursive
-cd EDGS
-git submodule update --init --recursive 
+docker compose up -d
+```
 
-conda create -y -n edgs python=3.10 pip
-conda activate edgs
+or you can install with running `script/install.sh`.
 
-# Set up path to your CUDA. In our experience similar versions like 12.2 also work well 
-export CUDA_HOME=/usr/local/cuda-12.1
-export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
-export PATH=$CUDA_HOME/bin:$PATH
+<a id="sec-data"></a>
+## 📦 Data
 
-conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia -y
-conda install nvidia/label/cuda-12.1.0::cuda-toolkit -y
+We evaluated on the following datasets:
 
-pip install -e submodules/gaussian-splatting/submodules/diff-gaussian-rasterization
-pip install -e submodules/gaussian-splatting/submodules/simple-knn
+- **MipNeRF360** — download [here](https://jonbarron.info/mipnerf360/). Unzip "Dataset Pt. 1" and "Dataset Pt. 2", then merge scenes.
+- **Tanks & Temples + Deep Blending** — from the [original 3DGS repo](https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/datasets/input/tandt_db.zip).
+
+### Using Your Own Dataset
 
-# For COLMAP and pycolmap
-# Optionally install original colmap but probably pycolmap suffices
-# conda install conda-forge/label/colmap_dev::colmap
-pip install pycolmap
+#### Option A
+Use gradle demo.
+After running `docker compose up -d`,
+```
+docker compose exec edgs-app bash
+python script/gradio_demo.py --port 7862
+```
 
+#### Option B
+From command line.
+```
+docker compose exec edgs-app bash
+python script/fit_model_to_scene_full.py --video_path <your mp4 video> [--output_dir <EDGS output directory>]
+```
 
-pip install wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg
-conda install numpy=1.26.4 -y -c conda-forge --override-channels
+> **🔧 Enhanced Video Processing**: The video processing pipeline now includes improved frame extraction with ffmpeg support, automatic handling of problematic video formats, and optimized COLMAP settings to ensure single unified reconstructions instead of fragmented models.
 
-pip install -e submodules/RoMa
-conda install anaconda::jupyter --yes
+**Additinal features:**
 
-# Stuff necessary for gradio and visualizations
-pip install gradio 
-pip install plotly scikit-learn moviepy==2.1.1 ffmpeg
-pip install open3d 
+1. **Skip COLMAP reconstruction** - Use existing COLMAP results to save time:
+```bash
+python script/fit_model_to_scene_full.py --colmap_scene_dir <path_to_colmap_scene>
 ```
 
-<a id="sec-data"></a>
-## 📦 Data
+2. **Separate output directory** - Keep COLMAP input and EDGS output separate:
+```bash
+python script/fit_model_to_scene_full.py \
+    --colmap_scene_dir <colmap_scene> \
+    --output_dir <edgs_output_dir>
+```
 
-We evaluated on the following datasets:
+3. **Memory-efficient configurations** - For GPUs with limited memory:
+```bash
+# Low memory mode (recommended for 12GB GPUs)
+python script/fit_model_to_scene_full.py --video_path <video> --config train_low_memory
 
-- **MipNeRF360** — download [here](https://jonbarron.info/mipnerf360/). Unzip "Dataset Pt. 1" and "Dataset Pt. 2", then merge scenes.
-- **Tanks & Temples + Deep Blending** — from the [original 3DGS repo](https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/datasets/input/tandt_db.zip).
+# Very low memory mode (for 8GB GPUs or very large scenes)  
+python script/fit_model_to_scene_full.py --video_path <video> --config train_very_low_memory
+```
 
-### Using Your Own Dataset
+4. **Frame extraction control** - Control video sampling rate for reconstruction quality:
+```bash
+# Extract frames at 3 fps (default, good balance)
+python script/fit_model_to_scene_full.py --video_path <video> --target_fps 3.0
+
+# Higher density extraction for complex scenes (4-5 fps)
+python script/fit_model_to_scene_full.py --video_path <video> --target_fps 5.0
+
+# Lower density for long videos or memory constraints (1-2 fps)
+python script/fit_model_to_scene_full.py --video_path <video> --target_fps 1.5
+```
 
+**Examples:**
+```bash
+# Process new video with low memory settings and custom frame rate
+python script/fit_model_to_scene_full.py \
+    --video_path data/my_video.mov \
+    --config train_low_memory \
+    --target_fps 3.0 \
+    --output_dir outputs/my_video_edgs
+
+# High-quality processing for complex scenes
+python script/fit_model_to_scene_full.py \
+    --video_path data/forest_scene.mp4 \
+    --config train_low_memory \
+    --target_fps 4.0 \
+    --output_dir outputs/forest_high_quality
+
+# Process problematic/4K videos with conservative settings
+python script/fit_model_to_scene_full.py \
+    --video_path data/4k_drone_video.MP4 \
+    --config train_very_low_memory \
+    --target_fps 2.0 \
+    --output_dir outputs/drone_4k
+
+# Use existing COLMAP scene with custom output location
+python script/fit_model_to_scene_full.py \
+    --colmap_scene_dir outputs/my_colmap_scene \
+    --config train_low_memory \
+    --output_dir outputs/my_edgs_results
+```
+
+#### Option C
+Using Jupyter lab.
+```
+docker compose exec edgs-app bash
+```
+And in the terminal in the docker container,
+```
+jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --notebook-dir=notebooks
+```
+After JupyterLab starts, it will print URLs to the terminal. Look for a URL containing a token, like:
+    `http://127.0.0.1:8888/lab?token=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
+Open `http://localhost:8888` (or `http://127.0.0.1:8888`) in your host browser.
+When prompted for a "Password or token", paste the `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx` part from the URL in step 4 into the field and log in. Alternatively, you can paste the full URL from step 4 directly into your browser.
+
+#### Option D
 You can use the same data format as the [3DGS project](https://github.com/graphdeco-inria/gaussian-splatting?tab=readme-ov-file#processing-your-own-scenes). Please follow their guide to prepare your scene.
 
 Expected folder structure:
@@ -134,17 +200,39 @@ scene_folder
         |---points3D.bin
 ```
 
+```
+docker compose exec edgs-app bash
+```
+Then run training command as described below section.
+
 Nerf synthetic format is also acceptable. 
 
 You can also use functions provided in our code to convert a collection of images or a sinlge video into a desired format. However, this may requre tweaking and processing time can be large for large collection of images with little overlap.
 
+## 🎬 Video Processing Improvements
+
+Recent enhancements to the video processing pipeline provide better handling of various video formats and improved reconstruction quality:
+
+### **Key Features:**
+- **Robust Frame Extraction**: Uses ffmpeg with OpenCV fallback for reliable processing of problematic video formats
+- **Single Reconstruction Guarantee**: Optimized COLMAP settings prevent fragmented reconstructions (multiple sparse/0, sparse/1, etc.)
+- **Configurable Frame Density**: Control extraction rate with `--target_fps` parameter for quality vs. performance trade-offs
+- **High-Resolution Support**: Automatic handling of 4K and high-resolution videos with memory management
+- **Corrupted Video Recovery**: Handles videos with incorrect metadata or codec issues
+
+### **Best Practices:**
+- **Forest/Complex Scenes**: Use `--target_fps 4.0` for better overlap
+- **Long Videos**: Use `--target_fps 1.5` to limit frame count
+- **4K/High-Res Videos**: Use `--config train_very_low_memory --target_fps 2.0`
+- **Standard Processing**: Default `--target_fps 3.0` works well for most cases
+
 <a id="sec-training"></a>
 ## 🏋️ Training
 
 
 To optimize on a single scene in COLMAP format use this code.  
 ```bash
-python train.py \
+python script/train.py \
   train.gs_epochs=30000 \
   train.no_densify=True \
   gs.dataset.source_path=<scene folder> \

diff --git a/configs/train.yaml b/configs/train.yaml
@@ -1,22 +1,22 @@
 defaults:
   - gs: base
-  - _self_ 
+  - _self_
 
 seed: 228
 
 wandb:
-  mode: "online" # "disabled" for no logging
+  mode: "disabled" # "online" or "disabled"
   entity: "3dcorrespondence"
   project: "Adv3DGS"
   group: null
   name: null
   tag: "debug"
-    
+
 train:
-  gs_epochs: 0 # number of 3dgs iterations
-  reduce_opacity: True 
+  gs_epochs: 30000 # number of 3dgs iterations
+  reduce_opacity: True
   no_densify: False # if True, the model will not be densified
-  max_lr: True 
+  max_lr: True
 
 load:
   gs: null #path to 3dgs checkpoint
@@ -27,12 +27,10 @@ verbose: true
 
 init_wC:
   use: True # use EDGS
-  matches_per_ref: 15_000 # number of matches per reference
-  num_refs: 180 # number of reference images
+  matches_per_ref: 20000 # number of matches per reference
+  num_refs: 360 #180 # number of reference images
   nns_per_ref: 3 # number of nearest neighbors per reference
   scaling_factor: 0.001
   proj_err_tolerance: 0.01
   roma_model: "outdoors" # you can change this to "indoors" or "outdoors"
-  add_SfM_init : False
-
-
+  add_SfM_init: False
diff --git a/configs/train_low_memory.yaml b/configs/train_low_memory.yaml
@@ -0,0 +1,31 @@
+defaults:
+  - train
+  - _self_
+
+# Low memory configuration for EDGS
+# This config reduces memory usage while maintaining quality
+
+# Disable densification to avoid memory spikes
+train:
+  no_densify: True  # EDGS works well without densification
+  gs_epochs: 60000  # Keep full training iterations for quality
+
+# Override Gaussian Splatting optimization settings for lower memory
+gs:
+  opt:
+    # Reduce batch size to use less memory per iteration
+    batch_size: 16  # Reduced from 64
+
+    # Increase opacity reset interval to avoid frequent memory allocations
+    opacity_reset_interval: 1_000_000  # Effectively disable opacity reset
+
+    # Keep densification settings but they won't be used due to no_densify
+    densify_from_iter: 500
+    densify_until_iter: 15000
+    densify_grad_threshold: 0.0002
+
+# Keep high quality initialization settings
+init_wC:
+  matches_per_ref: 15000  # Slightly reduced from 20000 for memory
+  num_refs: 180  # Reduced from 360 but still high quality
+  nns_per_ref: 3  # Keep nearest neighbors for good initialization
-Original file line number
+Diff line change
@@ Expand Up / @@ -158,3 +158,6 @@ dmypy.json @@
     # Pyre type checker
     .pyre/
     learnableearthparser/fast_sampler/_sampler.c
+    # data
+    data/