Skip to content
This repository was archived by the owner on Jun 19, 2025. It is now read-only.

Commit 86c88c2

Browse files
authored
Merge pull request #3097 from lissyx/update-0.8
Update 0.8
2 parents 1b10a25 + 18c37b9 commit 86c88c2

File tree

148 files changed

+1763
-1730
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+1763
-1730
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,5 @@
3232
/doc/.build/
3333
/doc/xml-c/
3434
/doc/xml-java/
35+
Dockerfile.build
36+
Dockerfile.train

.taskcluster.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ tasks:
4545
- "--login"
4646
- "-cxe"
4747
- >
48-
echo "deb http://archive.ubuntu.com/ubuntu/ trusty-updates main" > /etc/apt/sources.list.d/trusty-updates.list &&
48+
echo "deb http://archive.ubuntu.com/ubuntu/ xenial-updates main" > /etc/apt/sources.list.d/xenial-updates.list &&
4949
apt-get -qq update && apt-get -qq -y install git python3-pip curl sudo &&
5050
adduser --system --home /home/build-user build-user &&
5151
cd /home/build-user/ &&
Lines changed: 75 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,73 @@
1+
# Please refer to the USING documentation, "Dockerfile for building from source"
2+
13
# Need devel version cause we need /usr/include/cudnn.h
2-
# for compiling libctc_decoder_with_kenlm.so
3-
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
4+
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
45

6+
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
7+
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
58

69
# >> START Install base software
710

811
# Get basic packages
912
RUN apt-get update && apt-get install -y --no-install-recommends \
1013
apt-utils \
14+
bash-completion \
1115
build-essential \
16+
ca-certificates \
17+
cmake \
1218
curl \
13-
wget \
19+
g++ \
20+
gcc \
1421
git \
22+
git-lfs \
23+
libbz2-dev \
24+
libboost-all-dev \
25+
libgsm1-dev \
26+
libltdl-dev \
27+
liblzma-dev \
28+
libmagic-dev \
29+
libpng-dev \
30+
libsox-fmt-mp3 \
31+
libsox-dev \
32+
locales \
33+
openjdk-8-jdk \
34+
pkg-config \
1535
python3 \
1636
python3-dev \
1737
python3-pip \
1838
python3-wheel \
1939
python3-numpy \
20-
libcurl3-dev \
21-
ca-certificates \
22-
gcc \
2340
sox \
24-
libsox-fmt-mp3 \
25-
htop \
26-
nano \
27-
cmake \
28-
libboost-all-dev \
29-
zlib1g-dev \
30-
libbz2-dev \
31-
liblzma-dev \
32-
locales \
33-
pkg-config \
34-
libpng-dev \
35-
libsox-dev \
36-
libmagic-dev \
37-
libgsm1-dev \
38-
libltdl-dev \
39-
openjdk-8-jdk \
40-
bash-completion \
41-
g++ \
42-
unzip
43-
44-
RUN ln -s -f /usr/bin/python3 /usr/bin/python
41+
unzip \
42+
wget \
43+
zlib1g-dev
4544

46-
# Install NCCL 2.2
47-
RUN apt-get --no-install-recommends install -qq -y --allow-downgrades --allow-change-held-packages libnccl2=2.3.7-1+cuda10.0 libnccl-dev=2.3.7-1+cuda10.0
45+
RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
46+
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
4847

4948
# Install Bazel
50-
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel_0.24.1-linux-x86_64.deb"
49+
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/2.0.0/bazel_2.0.0-linux-x86_64.deb"
5150
RUN dpkg -i bazel_*.deb
5251

53-
# Install CUDA CLI Tools
54-
RUN apt-get --no-install-recommends install -qq -y cuda-command-line-tools-10-0
55-
56-
# Install pip
57-
RUN wget https://bootstrap.pypa.io/get-pip.py && \
58-
python3 get-pip.py && \
59-
rm get-pip.py
60-
6152
# << END Install base software
6253

63-
64-
65-
6654
# >> START Configure Tensorflow Build
6755

6856
# Clone TensorFlow from Mozilla repo
6957
RUN git clone https://github.com/mozilla/tensorflow/
7058
WORKDIR /tensorflow
71-
RUN git checkout r1.15
72-
59+
RUN git checkout r2.2
7360

7461
# GPU Environment Setup
62+
ENV TF_NEED_ROCM 0
63+
ENV TF_NEED_OPENCL_SYCL 0
64+
ENV TF_NEED_OPENCL 0
7565
ENV TF_NEED_CUDA 1
76-
ENV TF_CUDA_PATHS "/usr/local/cuda,/usr/lib/x86_64-linux-gnu/"
77-
ENV TF_CUDA_VERSION 10.0
78-
ENV TF_CUDNN_VERSION 7
66+
ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
67+
ENV TF_CUDA_VERSION 10.1
68+
ENV TF_CUDNN_VERSION 7.6
7969
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
80-
ENV TF_NCCL_VERSION 2.3
70+
ENV TF_NCCL_VERSION 2.4
8171

8272
# Common Environment Setup
8373
ENV TF_BUILD_CONTAINER_TYPE GPU
@@ -105,14 +95,12 @@ ENV TF_NEED_TENSORRT 0
10595
ENV TF_NEED_GDR 0
10696
ENV TF_NEED_VERBS 0
10797
ENV TF_NEED_OPENCL_SYCL 0
98+
10899
ENV PYTHON_BIN_PATH /usr/bin/python3.6
109-
ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages
100+
ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
110101

111102
# << END Configure Tensorflow Build
112103

113-
114-
115-
116104
# >> START Configure Bazel
117105

118106
# Running bazel inside a `docker build` command causes trouble, cf:
@@ -124,39 +112,17 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
124112
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
125113
>>/etc/bazel.bazelrc
126114

127-
# Put cuda libraries to where they are expected to be
128-
RUN mkdir /usr/local/cuda/lib && \
129-
ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \
130-
ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h && \
131-
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
132-
ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h
133-
134-
135-
# Set library paths
136-
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/
137-
138115
# << END Configure Bazel
139116

117+
WORKDIR /
140118

141-
# Copy DeepSpeech repo contents to container's /DeepSpeech
142-
COPY . /DeepSpeech/
143-
144-
# Alternative clone from GitHub
145-
# RUN apt-get update && apt-get install -y git-lfs
146-
# WORKDIR /
147-
# RUN git lfs install
148-
# RUN git clone https://github.com/mozilla/DeepSpeech.git
149-
119+
RUN git clone $DEEPSPEECH_REPO
150120
WORKDIR /DeepSpeech
151-
152-
RUN DS_NODECODER=1 pip3 --no-cache-dir install .
121+
RUN git checkout $DEEPSPEECH_SHA
153122

154123
# Link DeepSpeech native_client libs to tf folder
155124
RUN ln -s /DeepSpeech/native_client /tensorflow
156125

157-
158-
159-
160126
# >> START Build and bind
161127

162128
WORKDIR /tensorflow
@@ -170,59 +136,60 @@ RUN ./configure
170136

171137
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
172138

173-
174139
# Build DeepSpeech
175-
RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
176-
177-
###
178-
### Using TensorFlow upstream should work
179-
###
180-
# # Build TF pip package
181-
# RUN bazel build --config=opt --config=cuda --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //tensorflow/tools/pip_package:build_pip_package --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
182-
#
183-
# # Build wheel
184-
# RUN bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
185-
#
186-
# # Install tensorflow from our custom wheel
187-
# RUN pip3 install /tmp/tensorflow_pkg/*.whl
140+
RUN bazel build \
141+
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
142+
--config=monolithic \
143+
--config=cuda \
144+
-c opt \
145+
--copt=-O3 \
146+
--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
147+
--copt=-mtune=generic \
148+
--copt=-march=x86-64 \
149+
--copt=-msse \
150+
--copt=-msse2 \
151+
--copt=-msse3 \
152+
--copt=-msse4.1 \
153+
--copt=-msse4.2 \
154+
--copt=-mavx \
155+
--copt=-fvisibility=hidden \
156+
//native_client:libdeepspeech.so \
157+
--verbose_failures \
158+
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
188159

189160
# Copy built libs to /DeepSpeech/native_client
190161
RUN cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
191162

192-
# Install TensorFlow
193-
WORKDIR /DeepSpeech/
194-
RUN pip3 install tensorflow-gpu==1.15.0
195-
196-
197163
# Build client.cc and install Python client and decoder bindings
198164
ENV TFDIR /tensorflow
165+
166+
RUN nproc
167+
199168
WORKDIR /DeepSpeech/native_client
200-
RUN make deepspeech
169+
RUN make NUM_PROCESSES=$(nproc) deepspeech
201170

202171
WORKDIR /DeepSpeech
203-
RUN cd native_client/python && make bindings
172+
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
204173
RUN pip3 install --upgrade native_client/python/dist/*.whl
205174

206-
RUN cd native_client/ctcdecode && make bindings
175+
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
207176
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
208177

209-
210178
# << END Build and bind
211179

212-
213-
214-
215180
# Allow Python printing utf-8
216181
ENV PYTHONIOENCODING UTF-8
217182

218183
# Build KenLM in /DeepSpeech/native_client/kenlm folder
219184
WORKDIR /DeepSpeech/native_client
220-
RUN rm -rf kenlm \
221-
&& git clone --depth 1 https://github.com/kpu/kenlm && cd kenlm \
222-
&& mkdir -p build \
223-
&& cd build \
224-
&& cmake .. \
225-
&& make -j 4
185+
RUN rm -rf kenlm && \
186+
git clone https://github.com/kpu/kenlm && \
187+
cd kenlm && \
188+
git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
189+
mkdir -p build && \
190+
cd build && \
191+
cmake .. && \
192+
make -j $(nproc)
226193

227194
# Done
228195
WORKDIR /DeepSpeech

Dockerfile.train.tmpl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
2+
3+
FROM tensorflow/tensorflow:1.15.2-gpu-py3
4+
ENV DEBIAN_FRONTEND=noninteractive
5+
6+
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
7+
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
8+
9+
RUN apt-get update && apt-get install -y --no-install-recommends \
10+
apt-utils \
11+
bash-completion \
12+
build-essential \
13+
curl \
14+
git \
15+
git-lfs \
16+
libbz2-dev \
17+
locales \
18+
python3-venv \
19+
unzip \
20+
wget
21+
22+
# We need to remove it because it's breaking deepspeech install later with
23+
# weird errors about setuptools
24+
RUN apt-get purge -y python3-xdg
25+
26+
# Install dependencies for audio augmentation
27+
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
28+
29+
WORKDIR /
30+
RUN git lfs install
31+
RUN git clone $DEEPSPEECH_REPO
32+
33+
WORKDIR /DeepSpeech
34+
RUN git checkout $DEEPSPEECH_SHA
35+
36+
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
37+
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
38+
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
39+
40+
# Prepare deps
41+
RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3
42+
43+
# Install DeepSpeech
44+
# - No need for the decoder since we did it earlier
45+
# - There is already correct TensorFlow GPU installed on the base image,
46+
# we don't want to break that
47+
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
48+
49+
# Tool to convert output graph for inference
50+
RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \
51+
--artifact convert_graphdef_memmapped_format --target .
52+
53+
RUN ./bin/run-ldc93s1.sh

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
2+
DEEPSPEECH_SHA ?= origin/master
3+
4+
Dockerfile%: Dockerfile%.tmpl
5+
sed \
6+
-e "s|#DEEPSPEECH_REPO#|$(DEEPSPEECH_REPO)|g" \
7+
-e "s|#DEEPSPEECH_SHA#|$(DEEPSPEECH_SHA)|g" \
8+
< $< > $@

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Project DeepSpeech
1414

1515
DeepSpeech is an open source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper <https://arxiv.org/abs/1412.5567>`_. Project DeepSpeech uses Google's `TensorFlow <https://www.tensorflow.org/>`_ to make the implementation easier.
1616

17-
Documentation for installation, usage, and training models is available on `deepspeech.readthedocs.io <http://deepspeech.readthedocs.io/?badge=latest>`_.
17+
Documentation for installation, usage, and training models are available on `deepspeech.readthedocs.io <http://deepspeech.readthedocs.io/?badge=latest>`_.
1818

1919
For the latest release, including pre-trained models and checkpoints, `see the latest release on GitHub <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
2020

bin/import_cv.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def one_sample(sample):
9393
else:
9494
# This one is good - keep it for the target CSV
9595
rows.append((wav_filename, file_size, label))
96+
counter["imported_time"] += frames
9697
counter["all"] += 1
9798
counter["total_time"] += frames
9899
return (counter, rows)

bin/import_cv2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def one_sample(args):
7878
else:
7979
# This one is good - keep it for the target CSV
8080
rows.append((os.path.split(wav_filename)[-1], file_size, label, sample[2]))
81+
counter["imported_time"] += frames
8182
counter["all"] += 1
8283
counter["total_time"] += frames
8384

bin/import_lingua_libre.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def one_sample(sample):
9191
else:
9292
# This one is good - keep it for the target CSV
9393
rows.append((wav_filename, file_size, label))
94+
counter["imported_time"] += frames
9495
counter["all"] += 1
9596
counter["total_time"] += frames
9697

bin/import_m-ailabs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def one_sample(sample):
9191
else:
9292
# This one is good - keep it for the target CSV
9393
rows.append((wav_filename, file_size, label))
94+
counter["imported_time"] += frames
9495
counter["all"] += 1
9596
counter["total_time"] += frames
9697
return (counter, rows)

0 commit comments

Comments
 (0)