Skip to content

Commit 723b3d6

Browse files
author
Dan Forsberg
committed
Switch to Arrow and Ubuntu packages
1 parent d44c88a commit 723b3d6

File tree

6 files changed

+82
-190
lines changed

6 files changed

+82
-190
lines changed

build/Makefile.linux

Lines changed: 11 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -3,119 +3,39 @@ ROOT:=$(HERE)/../..
33
VTABLE:=$(ROOT)/parquet
44
SQLITE:=$(ROOT)/sqlite
55

6-
# Directories
7-
ARROW=$(HERE)/arrow
8-
ARROW_RELEASE=$(ARROW)/cpp/release
9-
BOOST_ROOT=$(ARROW_RELEASE)/boost_ep-prefix/src/boost_ep
10-
BOOST=$(BOOST_ROOT)/stage/lib
11-
BROTLI=$(ARROW_RELEASE)/brotli_ep/src/brotli_ep-install/lib/x86_64-linux-gnu
12-
ICU=$(HERE)/icu
13-
LZ4=$(ARROW_RELEASE)/lz4_ep-prefix/src/lz4_ep/lib
14-
PARQUET_CPP=$(HERE)/parquet-cpp
15-
SNAPPY=$(ARROW_RELEASE)/snappy_ep/src/snappy_ep-install/lib
16-
ZLIB=$(ARROW_RELEASE)/zlib_ep/src/zlib_ep-install/lib
17-
ZSTD=$(ARROW_RELEASE)/zstd_ep-prefix/src/zstd_ep/lib
18-
19-
# Libraries
20-
# profile_gen, profile_build for PGO
21-
APACHE_BUILD=release
22-
23-
ARROW_LIB = $(ARROW_RELEASE)/$(APACHE_BUILD)/libarrow.a
24-
BOOST_FILESYSTEM_LIB = $(BOOST)/libboost_filesystem.a
25-
BOOST_REGEX_LIB = $(BOOST)/libboost_regex.a
26-
BOOST_SYSTEM_LIB = $(BOOST)/libboost_system.a
27-
BROTLI_COMMON_LIB = $(BROTLI)/libbrotlicommon.a
28-
BROTLI_DEC_LIB = $(BROTLI)/libbrotlidec.a
29-
BROTLI_ENC_LIB = $(BROTLI)/libbrotlienc.a
30-
ICU_I18N_LIB=$(ICU)/source/lib/libicui18n.a
31-
ICU_UC_LIB=$(ICU)/source/lib/libicuuc.a
32-
ICU_DATA_LIB=$(ICU)/source/lib/libicudata.a
33-
LZ4_LIB = $(LZ4)/liblz4.a
34-
PARQUET_CPP_LIB = $(PARQUET_CPP)/build/$(APACHE_BUILD)/libparquet.a
35-
SNAPPY_LIB = $(SNAPPY)/libsnappy.a
36-
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
37-
ZLIB_LIB = $(ZLIB)/libz.a
38-
ZSTD_LIB = $(ZSTD)/libzstd.a
39-
406
# Flags
41-
CC = gcc
427
CXX = g++
438
OPTIMIZATIONS = -O3
44-
CPUS:=$(shell nproc)
45-
CFLAGS = -I $(SQLITE) -I $(PARQUET_CPP)/src -I $(ARROW)/cpp/src $(OPTIMIZATIONS) -std=c++11 -Wall -fPIC -g
9+
CFLAGS = -I $(SQLITE) $(OPTIMIZATIONS) -std=c++11 -Wall -fPIC -g
10+
LIBS = -lparquet -lboost_regex -lboost_system -lboost_filesystem \
11+
-lbrotlienc -lbrotlicommon -lbrotlidec -licui18n -licuuc -licudata \
12+
-llz4 -lsnappy -lthrift -lz -lzstd -lcrypto -lssl
4613

47-
ALL_LIBS = $(PARQUET_CPP_LIB) $(LZ4_LIB) $(ZSTD_LIB) $(THRIFT_LIB) $(SNAPPY_LIB) $(ARROW_LIB) \
48-
$(ICU_I18N_LIB) $(ICU_UC_LIB) $(ICU_DATA_LIB) \
49-
$(BROTLI_ENC_LIB) $(BROTLI_COMMON_LIB) $(BROTLI_DEC_LIB) $(BOOST_REGEX_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_FILESYSTEM_LIB)
14+
LDFLAGS = $(OPTIMIZATIONS) -Wl,--no-whole-archive $(LIBS) -lz -lcrypto -lssl
5015

51-
LDFLAGS = $(OPTIMIZATIONS) \
52-
-Wl,--whole-archive $(ALL_LIBS) \
53-
-Wl,--no-whole-archive -lz -lcrypto -lssl
5416
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
55-
LIBS = $(ARROW_LIB) $(PARQUET_CPP_LIB) $(ICU_I18N_LIB)
5617

5718
PROF =
5819

59-
libparquet.so: $(LIBS) $(OBJ)
20+
libparquet.so: $(OBJ)
6021
$(CXX) $(PROF) -shared -o $@ $(OBJ) $(LDFLAGS)
6122

62-
parquet_filter.o: $(VTABLE)/parquet_filter.cc $(VTABLE)/parquet_filter.h $(ARROW) $(PARQUET_CPP)
23+
parquet_filter.o: $(VTABLE)/parquet_filter.cc $(VTABLE)/parquet_filter.h
6324
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)
6425

65-
parquet_cursor.o: $(VTABLE)/parquet_cursor.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h $(ARROW) $(PARQUET_CPP)
26+
parquet_cursor.o: $(VTABLE)/parquet_cursor.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
6627
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)
6728

68-
parquet_table.o: $(VTABLE)/parquet_table.cc $(VTABLE)/parquet_table.h $(ARROW) $(PARQUET_CPP)
29+
parquet_table.o: $(VTABLE)/parquet_table.cc $(VTABLE)/parquet_table.h
6930
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)
7031

71-
parquet.o: $(VTABLE)/parquet.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h $(ARROW) $(PARQUET_CPP)
32+
parquet.o: $(VTABLE)/parquet.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
7233
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)
7334

74-
$(ARROW):
75-
rm -rf $(ARROW)
76-
git clone https://github.com/apache/arrow.git $(ARROW)
77-
cd $(ARROW) && git checkout apache-arrow-0.9.0
78-
mkdir $(ARROW)/cpp/release
79-
cd $(ARROW)/cpp/release && cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DARROW_BOOST_VENDORED=ON -DARROW_BOOST_USE_SHARED=OFF -DPARQUET_BUILD_SHARED=OFF ..
80-
touch -d @0 $(ARROW)
81-
82-
$(ARROW_LIB): $(ARROW)
83-
cd $(ARROW)/cpp/release && make -j$(CPUS)
84-
85-
# This is pretty gross. I'm sure someone who knows what they're doing could do this more cleanly.
86-
$(ICU_I18N_LIB):
87-
rm -rf $(ICU)
88-
mkdir $(ICU)
89-
cd $(ICU) && wget https://github.com/unicode-org/icu/releases/download/release-$(ICU_VERSION)/icu4c-$(ICU_VERSION_U)-src.tgz
90-
cd $(ICU) && tar xf icu4c-$(ICU_VERSION_U)-src.tgz --strip-components=1
91-
cd $(ICU)/source && ./configure --enable-static
92-
cd $(ICU)/source && make -j$(CPUS) LIBCFLAGS='-fPIC' LIBCXXFLAGS='-fPIC'
93-
94-
$(PARQUET_CPP):
95-
rm -rf $(PARQUET_CPP)
96-
git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP)
97-
cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0
98-
cd $(PARQUET_CPP) && BOOST_ROOT=$(BOOST_ROOT) BOOST_STATIC_REGEX_LIBRARY=$(BOOST_REGEX_LIB) SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static -DPARQUET_BOOST_USE_SHARED=OFF -DPARQUET_BUILD_SHARED=OFF .
99-
touch -d @0 $(PARQUET_CPP)
100-
101-
$(PARQUET_CPP_LIB): $(PARQUET_CPP) $(ARROW_LIB)
102-
cd $(PARQUET_CPP) && make -j$(CPUS)
103-
104-
.PHONY: clean arrow icu parquet publish_libs
35+
.PHONY: clean parquet
10536

10637
clean:
10738
rm -f *.o *.so
10839

10940
distclean:
11041
rm -rf $(SQLITE) $(HERE)
111-
112-
113-
arrow: $(ARROW_LIB)
114-
115-
icu: $(ICU_I18N_LIB)
116-
117-
parquet: $(PARQUET_CPP_LIB)
118-
119-
publish_libs:
120-
tar -cJf libs.tar.xz $(ALL_LIBS) $(SQLITE)/sqlite3
121-
s3cmd put libs.tar.xz s3://cldellow/public/libparquet/$$(lsb_release -s -r)/libs.tar.xz

make-linux

Lines changed: 16 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,36 @@
11
#!/bin/bash
22
set -euo pipefail
33

4+
apt install -y sudo lsb-release wget
5+
46
here=$(dirname "${BASH_SOURCE[0]}")
57
here=$(readlink -f "$here")
6-
prebuilt="$here"/build/linux/prebuilt
78
ubuntu="$(lsb_release -s -r)"
8-
libs=(libarrow.a libboost_filesystem.a libboost_regex.a libboost_system.a libbrotlicommon.a libbrotlidec.a \
9-
libbrotlienc.a libicudata.a libicui18n.a libicuuc.a liblz4.a libparquet.a libsnappy.a libthrift.a libzstd.a)
10-
lib_locs=()
119

1210
setup_directories() {
1311
cd "$here"
1412
mkdir -p build/linux
15-
mkdir -p "$prebuilt"
1613
cp -f build/Makefile.linux build/linux/Makefile
1714
cd build/linux
1815
}
1916

2017
install_prerequisites() {
18+
# install Apache Arrow libs
19+
# NOTE: Pinned to Ubuntu Focal
20+
wget https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-focal.deb
21+
sudo apt install -y -V ./apache-arrow-archive-keyring-latest-focal.deb
22+
sudo apt update -y
23+
sudo apt install -y -V libparquet-dev liblz4-dev libzstd-dev libthrift-dev \
24+
libsnappy-dev libthrift-dev libbrotli-dev libz-dev
25+
2126
# Install prereqs based on https://github.com/apache/parquet-cpp#linux
22-
sudo apt-get install libboost-dev g++ libboost-filesystem-dev \
27+
sudo apt install -y libboost-dev g++ libboost-filesystem-dev \
2328
libboost-program-options-dev libboost-regex-dev \
2429
libboost-system-dev libboost-test-dev \
2530
libssl-dev libtool bison flex pkg-config libreadline-dev libncurses-dev
2631

2732
# Install prereqs based on https://github.com/apache/arrow/tree/master/cpp
28-
sudo apt-get install cmake \
33+
sudo apt install -y cmake \
2934
libboost-dev \
3035
libboost-filesystem-dev \
3136
libboost-system-dev
@@ -48,6 +53,9 @@ set_icu_version() {
4853
18.04)
4954
export ICU_VERSION=60-2
5055
;;
56+
20.10)
57+
export ICU_VERSION=67-1
58+
;;
5159
*)
5260
echo "unsure what libicu version to use" >&2
5361
exit 1
@@ -56,47 +64,11 @@ set_icu_version() {
5664
export ICU_VERSION_U=${ICU_VERSION//-/_}
5765
}
5866

59-
add_prebuilt_lib() {
60-
lib_locs+=("$1=$prebuilt/$2.a")
61-
}
62-
63-
fetch_prebuilt_libs() {
64-
if [ ! -e "$prebuilt"/complete ]; then
65-
(
66-
cd "$prebuilt"
67-
curl "https://s3.amazonaws.com/cldellow/public/libparquet/$ubuntu/libs.tar.xz" > libs.tar.xz
68-
tar xf libs.tar.xz --xform 's#.*/##'
69-
touch "$prebuilt"/complete
70-
)
71-
fi
72-
73-
if [ ! -e "$here"/sqlite/sqlite3 ]; then
74-
ln -s "$prebuilt"/sqlite3 "$here"/sqlite/sqlite3
75-
fi
76-
77-
add_prebuilt_lib "PARQUET_CPP_LIB" libparquet
78-
add_prebuilt_lib "LZ4_LIB" liblz4
79-
add_prebuilt_lib "ZSTD_LIB" libzstd
80-
add_prebuilt_lib "THRIFT_LIB" libthrift
81-
add_prebuilt_lib "SNAPPY_LIB" libsnappy
82-
add_prebuilt_lib "ARROW_LIB" libarrow
83-
add_prebuilt_lib "ICU_I18N_LIB" libicui18n
84-
add_prebuilt_lib "ICU_UC_LIB" libicuuc
85-
add_prebuilt_lib "ICU_DATA_LIB" libicudata
86-
add_prebuilt_lib "BROTLI_ENC_LIB" libbrotlienc
87-
add_prebuilt_lib "BROTLI_COMMON_LIB" libbrotlicommon
88-
add_prebuilt_lib "BROTLI_DEC_LIB" libbrotlidec
89-
add_prebuilt_lib "BOOST_REGEX_LIB" libboost_regex
90-
add_prebuilt_lib "BOOST_SYSTEM_LIB" libboost_system
91-
add_prebuilt_lib "BOOST_FILESYSTEM_LIB" libboost_filesystem
92-
93-
}
94-
9567
main() {
68+
set_icu_version
9669
setup_directories
9770
install_prerequisites
9871
build_sqlite
99-
set_icu_version
10072

10173
if [ -v PREBUILT ]; then
10274
fetch_prebuilt_libs

parquet/parquet.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ static int parquetColumn(
290290
case parquet::Type::BYTE_ARRAY:
291291
{
292292
parquet::ByteArray* rv = cursor->getByteArray(col);
293-
if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
293+
if(cursor->getConvertedType(col) == parquet::ConvertedType::UTF8) {
294294
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
295295
} else {
296296
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);

0 commit comments

Comments
 (0)