Skip to content

Commit cce913e

Browse files
thammegowdaemjotde
authored andcommitted
Merged PR 35883: pymarian: manylinux whl builder; bind main() func
* pymarian: manylinux whl builder; * bind main() function. Upon pip install, a "pymarian" is made available which has same functionality as "marian" CLI. * fix github CI and devops CI with recent changes to cmake build for multiple python versions This PR was originally on public fork * #1029 * #1028
1 parent 5c587cc commit cce913e

File tree

15 files changed

+174
-61
lines changed

15 files changed

+174
-61
lines changed

.github/workflows/macos.yml

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@ on:
88

99
jobs:
1010
build-macos:
11-
name: MacOS CPU-only
11+
strategy:
12+
matrix:
13+
include:
14+
- name: "pymarian=false"
15+
pymarian: false
16+
name: "MacOS CPU-only ${{ matrix.name }}"
1217
runs-on: macos-12
1318

1419
steps:
@@ -33,7 +38,8 @@ jobs:
3338
-DCOMPILE_SERVER=off \
3439
-DCOMPILE_TESTS=on \
3540
-DUSE_FBGEMM=on \
36-
-DUSE_SENTENCEPIECE=on
41+
-DUSE_SENTENCEPIECE=on \
42+
-DPYMARIAN=${{matrix.pymarian}}
3743
3844
- name: Compile
3945
working-directory: build
@@ -52,8 +58,12 @@ jobs:
5258
ls -hlv $(find . -maxdepth 1 -type f -perm +ugo+x \( -name "marian*" -o -name "spm*" \))
5359
5460
- name: Install PyMarian
61+
working-directory: build
62+
if: matrix.pymarian == true
5563
run: |
64+
echo "Wheels built: " && ls -lh pymarian*.whl
5665
python3 -m pip install --upgrade pip setuptools wheel pytest
57-
CMAKE_ARGS="" python3 -m pip install -v .
58-
python3 -m pymarian -v
59-
MARIAN_QUIET=YES python3 -m pytest -vs src/python/tests
66+
python3 -m pip install -v pymarian*.whl
67+
python3 -m pymarian --version
68+
pymarian-eval --version
69+
pymarian --version

.github/workflows/ubuntu.yml

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ jobs:
2121
gpu: false
2222
unit_tests: true
2323
examples: false
24+
pymarian: true
2425
# Using Clang compiler
2526
- name: "Ubuntu CPU-only clang-14"
2627
os: ubuntu-22.04
@@ -31,6 +32,7 @@ jobs:
3132
gpu: false
3233
unit_tests: true
3334
examples: false
35+
pymarian: true
3436
# Ubuntu GPU-only build
3537
- name: "Ubuntu GPU-only"
3638
os: ubuntu-20.04
@@ -41,6 +43,7 @@ jobs:
4143
gpu: true
4244
unit_tests: false
4345
examples: true
46+
pymarian: true
4447
# Ubuntu 22.04 supports CUDA 11.7
4548
# Unit tests and examples are not compiled to save disk space
4649
- name: "Ubuntu 22.04 CUDA 11.7 gcc-11"
@@ -52,6 +55,7 @@ jobs:
5255
gpu: true
5356
unit_tests: false
5457
examples: false
58+
pymarian: true
5559
# Ubuntu 20.04 supports CUDA 11+
5660
# Unit tests and examples are not compiled to save disk space
5761
- name: "Ubuntu 20.04 CUDA 11.1 gcc-9"
@@ -63,6 +67,7 @@ jobs:
6367
gpu: true
6468
unit_tests: false
6569
examples: false
70+
pymarian: true
6671
# Ubuntu 18.04 supports CUDA 10.1+
6772
# But it will soon be removed from GitHub workflows
6873
# Ubuntu 16.04 supports CUDA 8+
@@ -123,6 +128,7 @@ jobs:
123128
-DUSE_FBGEMM=${{ matrix.cpu }} \
124129
-DUSE_SENTENCEPIECE=on \
125130
-DUSE_STATIC_LIBS=on \
131+
-DPYMARIAN=${{ matrix.pymarian }} \
126132
127133
- name: Compile
128134
working-directory: build
@@ -146,11 +152,17 @@ jobs:
146152
ls -hlv $(find . -maxdepth 1 -type f -executable \( -name "marian*" -o -name "spm*" \))
147153
148154
- name: Install PyMarian
155+
if: matrix.pymarian == true
149156
working-directory: build
150157
env:
151158
CUDA_VERSION: ${{ matrix.cuda }}
152159
run: |
153-
python3 -m pip install --upgrade pip setuptools wheel pytest
154-
CMAKE_ARGS="" python3 -m pip install -v .
155-
python3 -m pymarian -v
156-
MARIAN_QUIET=YES python3 -m pytest -vs src/python/tests
160+
echo "Built wheels:" && ls -lh pymarian*.whl
161+
pytag=$(python3 -c 'import sys; x,y=sys.version_info[:2]; print(f"cp{x}{y}-{sys.platform}")')
162+
whl=$(echo pymarian*${pytag}*.whl)
163+
echo "Chosen wheel: $pytag :: $whl" && ls -lh $whl
164+
python3 -m pip install --upgrade pip pytest
165+
python3 -m pip install -v $whl
166+
python3 -m pymarian --version
167+
pymarian-eval --version
168+
pymarian --version

.github/workflows/windows.yml

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ jobs:
2020
- name: "Windows CPU-only"
2121
cuda: ""
2222
gpu: false
23+
pymarian: false
2324
# Windows CPU+GPU build
2425
- name: "Windows CPU+CUDA"
2526
cuda: "10.2"
2627
gpu: true
28+
pymarian: false
2729

2830
runs-on: windows-2019
2931
name: ${{ matrix.name }}
@@ -86,6 +88,7 @@ jobs:
8688
-DUSE_MPI="FALSE"
8789
-DUSE_NCCL="FALSE"
8890
-DUSE_SENTENCEPIECE="TRUE"
91+
-DPYMARIAN="${{ matrix.pymarian }}"
8992
-DUSE_STATIC_LIBS="TRUE"'
9093
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
9194
cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt
@@ -116,6 +119,7 @@ jobs:
116119
-DUSE_MPI="FALSE"
117120
-DUSE_NCCL="FALSE"
118121
-DUSE_SENTENCEPIECE="TRUE"
122+
-DPYMARIAN="${{ matrix.pymarian }}"
119123
-DUSE_STATIC_LIBS="TRUE"'
120124
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
121125
cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt
@@ -138,12 +142,20 @@ jobs:
138142
shell: cmd
139143

140144
- name: Install PyMarian
141-
working-directory: src/python
145+
if: matrix.pymarian == true
146+
working-directory: build/
142147
run: |
143-
python3 -m pip install --upgrade pip setuptools wheel pytest
144-
python3 -m pip install -v .
145-
python3 -m pymarian -v
146-
python3 -m pytest -vs src/python/tests
148+
echo "Built wheels:"
149+
ls pymarian*.whl
150+
$pytag = python3 -c 'import sys; x,y=sys.version_info[:2]; print(f"cp{x}{y}-{sys.platform}")'
151+
$whl = ls pymarian*$pytag*.whl
152+
echo "Chosen wheel: $pytag :: $whl"
153+
ls $whl
154+
python3 -m pip install --upgrade pip pytest
155+
python3 -m pip install -v $whl
156+
python3 -m pymarian --version
157+
pymarian-eval --version
158+
pymarian --version
147159
env:
148160
CUDA_VERSION: ${{ matrix.cuda }}
149-
shell: cmd
161+
shell: powershell

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ build
5050
build-*
5151
# pymarian wheels
5252
dist/
53+
tmp
54+
tmp-*
55+
tmp.*
5356

5457
# Examples
5558
examples/*/*.gz

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
77

88
## [Unreleased]
99

10+
- Updates to pymarian: building for multiple python versions; disabling tcmalloc; hosting gated COMETs on HuggingFace
11+
- Scripts for building _manylinux_ compatible wheel files (a requirement for publishing wheels on PyPI)
12+
- Add "pymarian" CLI, a proxy to "marian" binary, but made available in PATH after "pip install pymarian"
13+
1014
### Added
1115
- Faster ARM64 matmul in `mjdgemm` using `vdotq_s32` instrinsics
1216
- `mjdgemm` as a FBGEMM replacement, also SSE4.2 support and ARM support for 8bit avx512-style packed FBGEMM matrices

azure-pipelines.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ stages:
400400
- bash: |
401401
set -x
402402
python3 -m pip install build/pymarian-*.whl
403-
python3 -m pymarian -v
403+
python3 -m pymarian --version
404404
displayName: Build Pymarian
405405
condition: eq(variables.pymarian, true)
406406
@@ -548,7 +548,7 @@ stages:
548548
- bash: |
549549
set -x
550550
python3 -m pip install build/pymarian-*.whl
551-
python3 -m pymarian -v
551+
python3 -m pymarian --version
552552
displayName: Build Pymarian
553553
554554

azure-regression-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ stages:
144144
145145
- bash: |
146146
python3 -m pip install build/pymarian-*.whl
147-
python3 -m pymarian -v
147+
python3 -m pymarian --version
148148
python3 -m pip install pytest
149149
python3 -m pytest src/python/tests/regression
150150
displayName: Pymarian Install and Test

cmake/PythonModules.cmake

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,8 @@ macro(py_exec)
3434
endif()
3535
endmacro()
3636

37-
set(PYBIND11_NOPYTHON On)
38-
# this wont work if pybind11 is git submodule
39-
#find_package(pybind11 REQUIRED)
40-
37+
# NOTE: this property must be set before including pybind11
38+
# set(PYBIND11_NOPYTHON On)
4139
## =====================
4240
set(PYTHON_SEARCH_VERSIONS 3.7 3.8 3.9 3.10 3.11 3.12 3.13)
4341
set(PYTHON_DISABLE_VERSIONS "" CACHE STRING "")

src/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,9 @@ endif(GENERATE_MARIAN_INSTALL_TARGETS)
332332

333333

334334
if(PYMARIAN)
335-
# python libs which use different version of tcmalloc (e.g. pandas) can cause segfaults, so we disable it
335+
# this property must be set **before** including pybind11
336+
# otherwise pybind will intervene with our own python version detection
337+
set(PYBIND11_NOPYTHON On)
336338
include_directories(3rd_party/pybind11/include)
337339
add_subdirectory(3rd_party/pybind11)
338340
include(PythonModules)

src/command/marian_main.cpp

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,25 +38,54 @@
3838
#include "marian_conv.cpp"
3939
#undef main
4040

41+
#include <map>
42+
#include <string>
43+
#include <tuple>
4144
#include "3rd_party/ExceptionWithCallStack.h"
45+
#include "3rd_party/spdlog/details/format.h"
4246

4347
int main(int argc, char** argv) {
4448
using namespace marian;
49+
using MainFunc = std::function<int(int, char**)>;
4550

46-
if(argc > 1 && argv[1][0] != '-') {
51+
std::map<std::string, std::tuple<MainFunc, std::string>> subcmds = {
52+
{"train", {mainTrainer, "Train a model (default)"}},
53+
{"decode", {mainDecoder, "Decode or translate text"}},
54+
{"score", {mainScorer, "Score translations"}},
55+
{"embed", {mainEmbedder, "Embed text"}},
56+
{"evaluate", {mainEvaluator, "Run Evaluator metric"}},
57+
{"vocab", {mainVocab, "Create vocabulary"}},
58+
{"convert", {mainConv, "Convert model file format"}}
59+
};
60+
// no arguments, or the first arg is "?"", print help message
61+
if (argc == 1 || (argc == 2 && (std::string(argv[1]) == "?") )) {
62+
std::cout << "Usage: " << argv[0] << " COMMAND [ARGS]" << std::endl;
63+
std::cout << "Commands:" << std::endl;
64+
for (auto&& [name, val] : subcmds) {
65+
std::cerr << fmt::format("{:10} : {}\n", name, std::get<1>(val));
66+
}
67+
return 0;
68+
}
69+
70+
if (argc > 1 && argv[1][0] != '-') {
4771
std::string cmd = argv[1];
4872
argc--;
4973
argv[1] = argv[0];
5074
argv++;
51-
if(cmd == "train") return mainTrainer(argc, argv);
52-
else if(cmd == "decode") return mainDecoder(argc, argv);
53-
else if (cmd == "score") return mainScorer(argc, argv);
54-
else if (cmd == "embed") return mainEmbedder(argc, argv);
55-
else if (cmd == "evaluate") return mainEvaluator(argc, argv);
56-
else if (cmd == "vocab") return mainVocab(argc, argv);
57-
else if (cmd == "convert") return mainConv(argc, argv);
58-
std::cerr << "Command must be train, decode, score, embed, vocab, or convert." << std::endl;
59-
exit(1);
60-
} else
75+
// check if the command is known, and if so, call the respective function
76+
// c++20 has contains() for maps, but we're not there yet, so we use count()
77+
if (subcmds.count(cmd) > 0) {
78+
auto [func, desc] = subcmds[cmd];
79+
return func(argc, argv);
80+
}
81+
else {
82+
std::cerr << "Unknown command: " << cmd << ". Known commands are:" << std::endl;
83+
for (auto&& [name, val] : subcmds) {
84+
std::cerr << fmt::format("{:10} : {}\n", name, std::get<1>(val));
85+
}
86+
return 1;
87+
}
88+
}
89+
else
6190
return mainTrainer(argc, argv);
6291
}

0 commit comments

Comments
 (0)