From c0fd630e7a51efa8e7fe65b76d55f5fbcd71339c Mon Sep 17 00:00:00 2001 From: nsuberi Date: Tue, 1 May 2018 00:20:07 +0800 Subject: [PATCH 01/10] first pass at odbc connector - problem with acquiring postgresql drivers --- com_009_material_flow_databases/.gitignore | 6 +++ com_009_material_flow_databases/.sampleenv | 1 + com_009_material_flow_databases/Dockerfile | 41 +++++++++++++++++++ com_009_material_flow_databases/README.md | 22 ++++++++++ .../contents/main.py | 4 ++ .../contents/odbcinst.ini | 7 ++++ .../contents/src/__init__.py | 28 +++++++++++++ com_009_material_flow_databases/main.py | 4 ++ com_009_material_flow_databases/start.sh | 16 ++++++++ com_009_material_flow_databases/time.cron | 1 + 10 files changed, 130 insertions(+) create mode 100644 com_009_material_flow_databases/.gitignore create mode 100644 com_009_material_flow_databases/.sampleenv create mode 100644 com_009_material_flow_databases/Dockerfile create mode 100644 com_009_material_flow_databases/README.md create mode 100644 com_009_material_flow_databases/contents/main.py create mode 100644 com_009_material_flow_databases/contents/odbcinst.ini create mode 100644 com_009_material_flow_databases/contents/src/__init__.py create mode 100644 com_009_material_flow_databases/main.py create mode 100755 com_009_material_flow_databases/start.sh create mode 100644 com_009_material_flow_databases/time.cron diff --git a/com_009_material_flow_databases/.gitignore b/com_009_material_flow_databases/.gitignore new file mode 100644 index 00000000..0472744e --- /dev/null +++ b/com_009_material_flow_databases/.gitignore @@ -0,0 +1,6 @@ +*.env +*# +*.py[c|o] +*.DS_Store +data/* +credentials.json \ No newline at end of file diff --git a/com_009_material_flow_databases/.sampleenv b/com_009_material_flow_databases/.sampleenv new file mode 100644 index 00000000..60f6f5c8 --- /dev/null +++ b/com_009_material_flow_databases/.sampleenv @@ -0,0 +1 @@ +mfa_db_password=<> diff --git a/com_009_material_flow_databases/Dockerfile b/com_009_material_flow_databases/Dockerfile new file mode 100644 index 00000000..5f0b2771 --- /dev/null +++ b/com_009_material_flow_databases/Dockerfile @@ -0,0 +1,41 @@ +FROM python:3.6 +MAINTAINER Nathan Suberi + +RUN apt-get update -y + +# Install core libraries for ODBC connection +RUN apt-get install -y unixodbc-dev unixodbc-bin unixodbc + +# https://github.com/mkleehammer/pyodbc +RUN pip install --upgrade pip && pip install pyodbc + +## Some attempts at manually installing the drivers fail +## https://blog.csdn.net/jollypigclub/article/details/46490541 +## https://www.cnblogs.com/he11o-liu/p/7503232.html +## https://odbc.postgresql.org/docs/unix-compilation.html + +#RUN apt-get install -y build-essential +#RUN cd /home +#RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz +#RUN gunzip psqlodbc-09.02.0100.tar.gz +#RUN tar xvf psqlodbc-09.02.0100.tar +#RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install + +# set name +ARG NAME=nrt-script +ENV NAME ${NAME} + +# copy the application folder inside the container +RUN mkdir -p /opt/$NAME/data +WORKDIR /opt/$NAME/ +COPY contents/ . + +# Set up ODBC driver info +RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini + +RUN useradd -r $NAME +RUN chown -R $NAME:$NAME /opt/$NAME +VOLUME /opt/$NAME/data +#USER $NAME + +CMD ["python", "main.py"] diff --git a/com_009_material_flow_databases/README.md b/com_009_material_flow_databases/README.md new file mode 100644 index 00000000..6af37d79 --- /dev/null +++ b/com_009_material_flow_databases/README.md @@ -0,0 +1,22 @@ +# gee-test + +Test NRT script for uploading to GEE. + +# Run + +Copy `.env.sample` to `.env` and enter account credentials. Copy GCS service account credential file to `credentials.json`. + +`./start.sh` Build docker and run once. + +# Modify + +`start.sh` Edit script name / Docker image name. + +`contents/` Copied into container. + +`contents/src/__init__.py` Main application script. + +`contents/src/eeUtil/` Utility module for interacting with GEE. + +`time.cron` Edit cron freqency. + diff --git a/com_009_material_flow_databases/contents/main.py b/com_009_material_flow_databases/contents/main.py new file mode 100644 index 00000000..eecdfb9a --- /dev/null +++ b/com_009_material_flow_databases/contents/main.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +if __name__ == '__main__': + import src + src.main() diff --git a/com_009_material_flow_databases/contents/odbcinst.ini b/com_009_material_flow_databases/contents/odbcinst.ini new file mode 100644 index 00000000..07ca1c8e --- /dev/null +++ b/com_009_material_flow_databases/contents/odbcinst.ini @@ -0,0 +1,7 @@ +[PostgreSQL Unicode] +Description = PostgreSQL ODBC driver (Unicode version) +Driver = psqlodbcw.so +Setup = libodbcpsqlS.so +Debug = 0 +CommLog = 1 +UsageCount = 2 diff --git a/com_009_material_flow_databases/contents/src/__init__.py b/com_009_material_flow_databases/contents/src/__init__.py new file mode 100644 index 00000000..d171f594 --- /dev/null +++ b/com_009_material_flow_databases/contents/src/__init__.py @@ -0,0 +1,28 @@ +from __future__ import unicode_literals + +import os +import sys +import logging + +import pyodbc + +# constants for bleaching alerts +SOURCE_URL = 'vps348928.ovh.net' +PORT = '5432' +DATABASE = 'mfa' +USER = 'mfa' +PASSWORD = os.environ.get('mfa_db_password') + +CONNECTION_STRING = 'DRIVER={};SERVER={};PORT={};DATABASE={};UID={};PWD={}' +cnxn = CONNECTION_STRING.format('{PostgreSQL Unicode}',SOURCE_URL, PORT, DATABASE, USER, PASSWORD) + +def main(): + '''Ingest new data into EE and delete old data''' + logging.basicConfig(stream=sys.stderr, level=logging.INFO) + logging.info('STARTING') + + # Initialize pyodbc + logging.info('Connection string: {}'.format(cnxn)) + myconnection = pyodbc.connect(cnxn, autocommit=True) + + logging.info('SUCCESS') diff --git a/com_009_material_flow_databases/main.py b/com_009_material_flow_databases/main.py new file mode 100644 index 00000000..eecdfb9a --- /dev/null +++ b/com_009_material_flow_databases/main.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +if __name__ == '__main__': + import src + src.main() diff --git a/com_009_material_flow_databases/start.sh b/com_009_material_flow_databases/start.sh new file mode 100755 index 00000000..1420add9 --- /dev/null +++ b/com_009_material_flow_databases/start.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +#Change the NAME variable with the name of your script +NAME=$(basename $(pwd)) +LOG=${LOG:-udp://localhost} + +docker build -t $NAME --build-arg NAME=$NAME . +docker run -it --log-driver=syslog \ + --log-opt syslog-address=$LOG \ + --log-opt tag=$NAME \ + --env-file .env \ + --rm $NAME \ + python main.py + + #/bin/bash + diff --git a/com_009_material_flow_databases/time.cron b/com_009_material_flow_databases/time.cron new file mode 100644 index 00000000..e72b2d58 --- /dev/null +++ b/com_009_material_flow_databases/time.cron @@ -0,0 +1 @@ +0 0 0 0 * From e5b5dc2a85d34ee35d2c7aad0f73c3dc831f9e7e Mon Sep 17 00:00:00 2001 From: nsuberi Date: Tue, 1 May 2018 01:44:11 +0800 Subject: [PATCH 02/10] provide test Dockerfile using Conda --- com_009_material_flow_databases/Dockerfile | 13 +++++++------ com_009_material_flow_databases/start.sh | 1 - com_009_material_flow_databases/test/Dockerfile | 10 ++++++++++ 3 files changed, 17 insertions(+), 7 deletions(-) create mode 100644 com_009_material_flow_databases/test/Dockerfile diff --git a/com_009_material_flow_databases/Dockerfile b/com_009_material_flow_databases/Dockerfile index 5f0b2771..a5dfe8eb 100644 --- a/com_009_material_flow_databases/Dockerfile +++ b/com_009_material_flow_databases/Dockerfile @@ -14,12 +14,11 @@ RUN pip install --upgrade pip && pip install pyodbc ## https://www.cnblogs.com/he11o-liu/p/7503232.html ## https://odbc.postgresql.org/docs/unix-compilation.html -#RUN apt-get install -y build-essential -#RUN cd /home -#RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz -#RUN gunzip psqlodbc-09.02.0100.tar.gz -#RUN tar xvf psqlodbc-09.02.0100.tar -#RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install +RUN apt-get install -y build-essential +RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz +RUN gunzip psqlodbc-09.02.0100.tar.gz +RUN tar xvf psqlodbc-09.02.0100.tar +## RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install # set name ARG NAME=nrt-script @@ -32,6 +31,8 @@ COPY contents/ . # Set up ODBC driver info RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini +RUN cat /etc/odbcinst.ini +RUN odbcinst -j RUN useradd -r $NAME RUN chown -R $NAME:$NAME /opt/$NAME diff --git a/com_009_material_flow_databases/start.sh b/com_009_material_flow_databases/start.sh index 1420add9..440cea56 100755 --- a/com_009_material_flow_databases/start.sh +++ b/com_009_material_flow_databases/start.sh @@ -13,4 +13,3 @@ docker run -it --log-driver=syslog \ python main.py #/bin/bash - diff --git a/com_009_material_flow_databases/test/Dockerfile b/com_009_material_flow_databases/test/Dockerfile new file mode 100644 index 00000000..0acfc49e --- /dev/null +++ b/com_009_material_flow_databases/test/Dockerfile @@ -0,0 +1,10 @@ +FROM continuumio/miniconda3 + +RUN conda install pyodbc + +RUN apt-get update -y +RUN apt-get install -y build-essential +RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz +RUN gunzip psqlodbc-09.02.0100.tar.gz +RUN tar xvf psqlodbc-09.02.0100.tar +RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install From 0550020ef60db9350a5543dd995d0e1bb9b621e2 Mon Sep 17 00:00:00 2001 From: nsuberi Date: Wed, 2 May 2018 23:07:53 +0800 Subject: [PATCH 03/10] wip got connection to go through --- com_009_material_flow_databases/Dockerfile | 30 +++++-------- .../contents/odbcinst.ini | 2 +- .../contents/queries.py | 28 +++++++++++++ .../contents/src/__init__.py | 14 ++++++- com_009_material_flow_databases/start.sh | 5 ++- .../test/Dockerfile | 10 ----- .../test/Dockerfile_without_conda | 42 +++++++++++++++++++ 7 files changed, 96 insertions(+), 35 deletions(-) create mode 100644 com_009_material_flow_databases/contents/queries.py delete mode 100644 com_009_material_flow_databases/test/Dockerfile create mode 100644 com_009_material_flow_databases/test/Dockerfile_without_conda diff --git a/com_009_material_flow_databases/Dockerfile b/com_009_material_flow_databases/Dockerfile index a5dfe8eb..0c39ddfc 100644 --- a/com_009_material_flow_databases/Dockerfile +++ b/com_009_material_flow_databases/Dockerfile @@ -1,39 +1,29 @@ -FROM python:3.6 -MAINTAINER Nathan Suberi +FROM continuumio/miniconda3 +# Install necessary libraries +RUN conda install pyodbc RUN apt-get update -y +RUN apt-get install -y build-essential unixodbc-dev unixodbc-bin unixodbc libpq-dev -# Install core libraries for ODBC connection -RUN apt-get install -y unixodbc-dev unixodbc-bin unixodbc - -# https://github.com/mkleehammer/pyodbc -RUN pip install --upgrade pip && pip install pyodbc - -## Some attempts at manually installing the drivers fail -## https://blog.csdn.net/jollypigclub/article/details/46490541 -## https://www.cnblogs.com/he11o-liu/p/7503232.html -## https://odbc.postgresql.org/docs/unix-compilation.html - -RUN apt-get install -y build-essential +# Configure postgresql drivers RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz RUN gunzip psqlodbc-09.02.0100.tar.gz RUN tar xvf psqlodbc-09.02.0100.tar -## RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install +RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install -# set name -ARG NAME=nrt-script +# Provide name of container +ARG NAME=com_009_connector ENV NAME ${NAME} -# copy the application folder inside the container +# Copy the application folder inside the container RUN mkdir -p /opt/$NAME/data WORKDIR /opt/$NAME/ COPY contents/ . # Set up ODBC driver info RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini -RUN cat /etc/odbcinst.ini -RUN odbcinst -j +# Restrict permissions RUN useradd -r $NAME RUN chown -R $NAME:$NAME /opt/$NAME VOLUME /opt/$NAME/data diff --git a/com_009_material_flow_databases/contents/odbcinst.ini b/com_009_material_flow_databases/contents/odbcinst.ini index 07ca1c8e..4eeaf09d 100644 --- a/com_009_material_flow_databases/contents/odbcinst.ini +++ b/com_009_material_flow_databases/contents/odbcinst.ini @@ -1,6 +1,6 @@ [PostgreSQL Unicode] Description = PostgreSQL ODBC driver (Unicode version) -Driver = psqlodbcw.so +Driver = /usr/local/lib/psqlodbcw.so Setup = libodbcpsqlS.so Debug = 0 CommLog = 1 diff --git a/com_009_material_flow_databases/contents/queries.py b/com_009_material_flow_databases/contents/queries.py new file mode 100644 index 00000000..6ec1688f --- /dev/null +++ b/com_009_material_flow_databases/contents/queries.py @@ -0,0 +1,28 @@ +FlowMFA = ''' +SELECT c.Name AS Country, d.Country AS ISOAlpha3, f.Name AS Flow, m2.Name AS MFA13, m.Name AS MFA4, d.Year AS Year, d.Amount AS Amount + FROM FlowMFA d LEFT JOIN Country c ON d.Country = c.Code + LEFT JOIN Flow f ON d.Flow = f.Code + LEFT JOIN MFA13 m2 ON d.MFA13 = m2.Code + LEFT JOIN MFA4 m ON d.MFA4 = m.Code + ORDER BY Flow, Year, MFA4, Country, MFA13; + ''' + + +FlowDetailed = ''' +SELECT d.Year AS Year, c1.Name AS OriginCountry, d.Source AS OriginISOAlpha3, c2.Name AS ConsumerCountry, d.Destination AS ConsumerISOAlpha3, m.Name AS MFA4, p.Name AS ProductGroup, d.Amount AS Amount + FROM FlowDetailed d LEFT JOIN Country c1 ON d.Source = c1.Code + LEFT JOIN Country c2 ON d.Destination = c2.Code + LEFT JOIN MFA4 m ON d.MFA4 = m.Code + LEFT JOIN Productgroup p ON d.ProductGroup = p.Code + ORDER BY Year, MFA4, ConsumerCountry, ProductGroup, OriginCountry; + ''' + +Footprint = ''' +SELECT d.Year AS Year, c2.Name AS ConsumerCountry, d.Destination AS ConsumerISOAlpha3, m.Name AS MFA4, sum(d.Amount) AS Amount + FROM FlowDetailed d + LEFT JOIN Country c2 ON d.Destination = c2.Code + LEFT JOIN MFA4 m ON d.MFA4 = m.Code + LEFT JOIN Productgroup p ON d.ProductGroup = p.Code + GROUP BY Year, MFA4, ConsumerCountry + ORDER BY Year, MFA4, ConsumerCountry; + ''' diff --git a/com_009_material_flow_databases/contents/src/__init__.py b/com_009_material_flow_databases/contents/src/__init__.py index d171f594..e5b724ee 100644 --- a/com_009_material_flow_databases/contents/src/__init__.py +++ b/com_009_material_flow_databases/contents/src/__init__.py @@ -6,6 +6,8 @@ import pyodbc +import queries as q + # constants for bleaching alerts SOURCE_URL = 'vps348928.ovh.net' PORT = '5432' @@ -14,7 +16,7 @@ PASSWORD = os.environ.get('mfa_db_password') CONNECTION_STRING = 'DRIVER={};SERVER={};PORT={};DATABASE={};UID={};PWD={}' -cnxn = CONNECTION_STRING.format('{PostgreSQL Unicode}',SOURCE_URL, PORT, DATABASE, USER, PASSWORD) +cnxnstr = CONNECTION_STRING.format('{PostgreSQL Unicode}',SOURCE_URL, PORT, DATABASE, USER, PASSWORD) def main(): '''Ingest new data into EE and delete old data''' @@ -23,6 +25,14 @@ def main(): # Initialize pyodbc logging.info('Connection string: {}'.format(cnxn)) - myconnection = pyodbc.connect(cnxn, autocommit=True) + cnxn = pyodbc.connect(cnxnstr, autocommit=True) + cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8') + cnxn.setencoding(encoding='utf-8') + + cursor = cnxn.cursor() + + print(q.FlowMFA) + + #cursor.execute("select * from tmp").fetchone() logging.info('SUCCESS') diff --git a/com_009_material_flow_databases/start.sh b/com_009_material_flow_databases/start.sh index 440cea56..4bdfec85 100755 --- a/com_009_material_flow_databases/start.sh +++ b/com_009_material_flow_databases/start.sh @@ -10,6 +10,7 @@ docker run -it --log-driver=syslog \ --log-opt tag=$NAME \ --env-file .env \ --rm $NAME \ - python main.py + /bin/bash - #/bin/bash + #python main.py + diff --git a/com_009_material_flow_databases/test/Dockerfile b/com_009_material_flow_databases/test/Dockerfile deleted file mode 100644 index 0acfc49e..00000000 --- a/com_009_material_flow_databases/test/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM continuumio/miniconda3 - -RUN conda install pyodbc - -RUN apt-get update -y -RUN apt-get install -y build-essential -RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz -RUN gunzip psqlodbc-09.02.0100.tar.gz -RUN tar xvf psqlodbc-09.02.0100.tar -RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install diff --git a/com_009_material_flow_databases/test/Dockerfile_without_conda b/com_009_material_flow_databases/test/Dockerfile_without_conda new file mode 100644 index 00000000..a5dfe8eb --- /dev/null +++ b/com_009_material_flow_databases/test/Dockerfile_without_conda @@ -0,0 +1,42 @@ +FROM python:3.6 +MAINTAINER Nathan Suberi + +RUN apt-get update -y + +# Install core libraries for ODBC connection +RUN apt-get install -y unixodbc-dev unixodbc-bin unixodbc + +# https://github.com/mkleehammer/pyodbc +RUN pip install --upgrade pip && pip install pyodbc + +## Some attempts at manually installing the drivers fail +## https://blog.csdn.net/jollypigclub/article/details/46490541 +## https://www.cnblogs.com/he11o-liu/p/7503232.html +## https://odbc.postgresql.org/docs/unix-compilation.html + +RUN apt-get install -y build-essential +RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz +RUN gunzip psqlodbc-09.02.0100.tar.gz +RUN tar xvf psqlodbc-09.02.0100.tar +## RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install + +# set name +ARG NAME=nrt-script +ENV NAME ${NAME} + +# copy the application folder inside the container +RUN mkdir -p /opt/$NAME/data +WORKDIR /opt/$NAME/ +COPY contents/ . + +# Set up ODBC driver info +RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini +RUN cat /etc/odbcinst.ini +RUN odbcinst -j + +RUN useradd -r $NAME +RUN chown -R $NAME:$NAME /opt/$NAME +VOLUME /opt/$NAME/data +#USER $NAME + +CMD ["python", "main.py"] From 32eee0cbdd7d83adbf94f4d4f7604fbeb0472e2f Mon Sep 17 00:00:00 2001 From: nsuberi Date: Wed, 2 May 2018 23:21:28 +0800 Subject: [PATCH 04/10] connection proven to work w/ Countries table - running the larger queries may take some time --- .../contents/src/__init__.py | 38 ++++++++++++++++--- com_009_material_flow_databases/start.sh | 5 +-- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/com_009_material_flow_databases/contents/src/__init__.py b/com_009_material_flow_databases/contents/src/__init__.py index e5b724ee..44e86e6c 100644 --- a/com_009_material_flow_databases/contents/src/__init__.py +++ b/com_009_material_flow_databases/contents/src/__init__.py @@ -6,7 +6,36 @@ import pyodbc -import queries as q +#from . import queries as q +q = {} +q['FlowMFA'] = ''' +SELECT c.Name AS Country, d.Country AS ISOAlpha3, f.Name AS Flow, m2.Name AS MFA13, m.Name AS MFA4, d.Year AS Year, d.Amount AS Amount + FROM FlowMFA d LEFT JOIN Country c ON d.Country = c.Code + LEFT JOIN Flow f ON d.Flow = f.Code + LEFT JOIN MFA13 m2 ON d.MFA13 = m2.Code + LEFT JOIN MFA4 m ON d.MFA4 = m.Code + ORDER BY Flow, Year, MFA4, Country, MFA13; + ''' + + +q['FlowDetailed'] = ''' +SELECT d.Year AS Year, c1.Name AS OriginCountry, d.Source AS OriginISOAlpha3, c2.Name AS ConsumerCountry, d.Destination AS ConsumerISOAlpha3, m.Name AS MFA4, p.Name AS ProductGroup, d.Amount AS Amount + FROM FlowDetailed d LEFT JOIN Country c1 ON d.Source = c1.Code + LEFT JOIN Country c2 ON d.Destination = c2.Code + LEFT JOIN MFA4 m ON d.MFA4 = m.Code + LEFT JOIN Productgroup p ON d.ProductGroup = p.Code + ORDER BY Year, MFA4, ConsumerCountry, ProductGroup, OriginCountry; + ''' + +q['Footprint'] = ''' +SELECT d.Year AS Year, c2.Name AS ConsumerCountry, d.Destination AS ConsumerISOAlpha3, m.Name AS MFA4, sum(d.Amount) AS Amount + FROM FlowDetailed d + LEFT JOIN Country c2 ON d.Destination = c2.Code + LEFT JOIN MFA4 m ON d.MFA4 = m.Code + LEFT JOIN Productgroup p ON d.ProductGroup = p.Code + GROUP BY Year, MFA4, ConsumerCountry + ORDER BY Year, MFA4, ConsumerCountry; + ''' # constants for bleaching alerts SOURCE_URL = 'vps348928.ovh.net' @@ -24,15 +53,14 @@ def main(): logging.info('STARTING') # Initialize pyodbc - logging.info('Connection string: {}'.format(cnxn)) + logging.info('Connection string: {}'.format(cnxnstr)) cnxn = pyodbc.connect(cnxnstr, autocommit=True) cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8') cnxn.setencoding(encoding='utf-8') cursor = cnxn.cursor() - print(q.FlowMFA) - - #cursor.execute("select * from tmp").fetchone() + for row in cursor.execute('SELECT * FROM Country').fetchall(): + logging.info(row) logging.info('SUCCESS') diff --git a/com_009_material_flow_databases/start.sh b/com_009_material_flow_databases/start.sh index 4bdfec85..440cea56 100755 --- a/com_009_material_flow_databases/start.sh +++ b/com_009_material_flow_databases/start.sh @@ -10,7 +10,6 @@ docker run -it --log-driver=syslog \ --log-opt tag=$NAME \ --env-file .env \ --rm $NAME \ - /bin/bash + python main.py - #python main.py - + #/bin/bash From abb75f9bf44301d75b5cddce27cc833cb7ec1f19 Mon Sep 17 00:00:00 2001 From: nsuberi Date: Fri, 4 May 2018 01:34:17 +0800 Subject: [PATCH 05/10] wip untested - uploading flowmfa and flowdetailed to carto account --- com_009_material_flow_databases/.sampleenv | 3 + com_009_material_flow_databases/Dockerfile | 4 + .../{test => }/Dockerfile_without_conda | 5 +- .../contents/queries.py | 28 ------- .../contents/src/__init__.py | 75 +++++++++---------- 5 files changed, 43 insertions(+), 72 deletions(-) rename com_009_material_flow_databases/{test => }/Dockerfile_without_conda (84%) delete mode 100644 com_009_material_flow_databases/contents/queries.py diff --git a/com_009_material_flow_databases/.sampleenv b/com_009_material_flow_databases/.sampleenv index 60f6f5c8..e3f6829d 100644 --- a/com_009_material_flow_databases/.sampleenv +++ b/com_009_material_flow_databases/.sampleenv @@ -1 +1,4 @@ mfa_db_password=<> + +carto_user=<> +carto_password=<> diff --git a/com_009_material_flow_databases/Dockerfile b/com_009_material_flow_databases/Dockerfile index 0c39ddfc..cb75f2b3 100644 --- a/com_009_material_flow_databases/Dockerfile +++ b/com_009_material_flow_databases/Dockerfile @@ -11,6 +11,10 @@ RUN gunzip psqlodbc-09.02.0100.tar.gz RUN tar xvf psqlodbc-09.02.0100.tar RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install +# Provide pandas +RUN conda install pandas +RUN conda install cartoframes + # Provide name of container ARG NAME=com_009_connector ENV NAME ${NAME} diff --git a/com_009_material_flow_databases/test/Dockerfile_without_conda b/com_009_material_flow_databases/Dockerfile_without_conda similarity index 84% rename from com_009_material_flow_databases/test/Dockerfile_without_conda rename to com_009_material_flow_databases/Dockerfile_without_conda index a5dfe8eb..52b7f055 100644 --- a/com_009_material_flow_databases/test/Dockerfile_without_conda +++ b/com_009_material_flow_databases/Dockerfile_without_conda @@ -4,7 +4,7 @@ MAINTAINER Nathan Suberi RUN apt-get update -y # Install core libraries for ODBC connection -RUN apt-get install -y unixodbc-dev unixodbc-bin unixodbc +RUN apt-get install -y build-essential unixodbc-dev unixodbc-bin unixodbc # https://github.com/mkleehammer/pyodbc RUN pip install --upgrade pip && pip install pyodbc @@ -14,11 +14,10 @@ RUN pip install --upgrade pip && pip install pyodbc ## https://www.cnblogs.com/he11o-liu/p/7503232.html ## https://odbc.postgresql.org/docs/unix-compilation.html -RUN apt-get install -y build-essential RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz RUN gunzip psqlodbc-09.02.0100.tar.gz RUN tar xvf psqlodbc-09.02.0100.tar -## RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install +RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install # set name ARG NAME=nrt-script diff --git a/com_009_material_flow_databases/contents/queries.py b/com_009_material_flow_databases/contents/queries.py deleted file mode 100644 index 6ec1688f..00000000 --- a/com_009_material_flow_databases/contents/queries.py +++ /dev/null @@ -1,28 +0,0 @@ -FlowMFA = ''' -SELECT c.Name AS Country, d.Country AS ISOAlpha3, f.Name AS Flow, m2.Name AS MFA13, m.Name AS MFA4, d.Year AS Year, d.Amount AS Amount - FROM FlowMFA d LEFT JOIN Country c ON d.Country = c.Code - LEFT JOIN Flow f ON d.Flow = f.Code - LEFT JOIN MFA13 m2 ON d.MFA13 = m2.Code - LEFT JOIN MFA4 m ON d.MFA4 = m.Code - ORDER BY Flow, Year, MFA4, Country, MFA13; - ''' - - -FlowDetailed = ''' -SELECT d.Year AS Year, c1.Name AS OriginCountry, d.Source AS OriginISOAlpha3, c2.Name AS ConsumerCountry, d.Destination AS ConsumerISOAlpha3, m.Name AS MFA4, p.Name AS ProductGroup, d.Amount AS Amount - FROM FlowDetailed d LEFT JOIN Country c1 ON d.Source = c1.Code - LEFT JOIN Country c2 ON d.Destination = c2.Code - LEFT JOIN MFA4 m ON d.MFA4 = m.Code - LEFT JOIN Productgroup p ON d.ProductGroup = p.Code - ORDER BY Year, MFA4, ConsumerCountry, ProductGroup, OriginCountry; - ''' - -Footprint = ''' -SELECT d.Year AS Year, c2.Name AS ConsumerCountry, d.Destination AS ConsumerISOAlpha3, m.Name AS MFA4, sum(d.Amount) AS Amount - FROM FlowDetailed d - LEFT JOIN Country c2 ON d.Destination = c2.Code - LEFT JOIN MFA4 m ON d.MFA4 = m.Code - LEFT JOIN Productgroup p ON d.ProductGroup = p.Code - GROUP BY Year, MFA4, ConsumerCountry - ORDER BY Year, MFA4, ConsumerCountry; - ''' diff --git a/com_009_material_flow_databases/contents/src/__init__.py b/com_009_material_flow_databases/contents/src/__init__.py index 44e86e6c..0525e200 100644 --- a/com_009_material_flow_databases/contents/src/__init__.py +++ b/com_009_material_flow_databases/contents/src/__init__.py @@ -4,48 +4,25 @@ import sys import logging -import pyodbc - -#from . import queries as q -q = {} -q['FlowMFA'] = ''' -SELECT c.Name AS Country, d.Country AS ISOAlpha3, f.Name AS Flow, m2.Name AS MFA13, m.Name AS MFA4, d.Year AS Year, d.Amount AS Amount - FROM FlowMFA d LEFT JOIN Country c ON d.Country = c.Code - LEFT JOIN Flow f ON d.Flow = f.Code - LEFT JOIN MFA13 m2 ON d.MFA13 = m2.Code - LEFT JOIN MFA4 m ON d.MFA4 = m.Code - ORDER BY Flow, Year, MFA4, Country, MFA13; - ''' - - -q['FlowDetailed'] = ''' -SELECT d.Year AS Year, c1.Name AS OriginCountry, d.Source AS OriginISOAlpha3, c2.Name AS ConsumerCountry, d.Destination AS ConsumerISOAlpha3, m.Name AS MFA4, p.Name AS ProductGroup, d.Amount AS Amount - FROM FlowDetailed d LEFT JOIN Country c1 ON d.Source = c1.Code - LEFT JOIN Country c2 ON d.Destination = c2.Code - LEFT JOIN MFA4 m ON d.MFA4 = m.Code - LEFT JOIN Productgroup p ON d.ProductGroup = p.Code - ORDER BY Year, MFA4, ConsumerCountry, ProductGroup, OriginCountry; - ''' +from datetime import datetime -q['Footprint'] = ''' -SELECT d.Year AS Year, c2.Name AS ConsumerCountry, d.Destination AS ConsumerISOAlpha3, m.Name AS MFA4, sum(d.Amount) AS Amount - FROM FlowDetailed d - LEFT JOIN Country c2 ON d.Destination = c2.Code - LEFT JOIN MFA4 m ON d.MFA4 = m.Code - LEFT JOIN Productgroup p ON d.ProductGroup = p.Code - GROUP BY Year, MFA4, ConsumerCountry - ORDER BY Year, MFA4, ConsumerCountry; - ''' +import pyodbc +import pandas as pd +import cartoframes -# constants for bleaching alerts -SOURCE_URL = 'vps348928.ovh.net' -PORT = '5432' -DATABASE = 'mfa' -USER = 'mfa' -PASSWORD = os.environ.get('mfa_db_password') +# ODBC Connection details -- these can be pulled out into an odbc.ini file +ODBC_SOURCE_URL = 'vps348928.ovh.net' +ODBC_PORT = '5432' +ODBC_DATABASE = 'mfa' +OBDC_USER = 'mfa' +ODBC_PASSWORD = os.environ.get('mfa_db_password') CONNECTION_STRING = 'DRIVER={};SERVER={};PORT={};DATABASE={};UID={};PWD={}' -cnxnstr = CONNECTION_STRING.format('{PostgreSQL Unicode}',SOURCE_URL, PORT, DATABASE, USER, PASSWORD) +cnxnstr = CONNECTION_STRING.format('{PostgreSQL Unicode}', ODBC_SOURCE_URL, ODBC_PORT, ODBC_DATABASE, ODBC_USER, ODBC_PASSWORD) + +# Carto Connection details +CARTO_USER = os.environ.get('carto_user') +CARTO_PASSWORD = os.environ.get('carto_password') def main(): '''Ingest new data into EE and delete old data''' @@ -57,10 +34,26 @@ def main(): cnxn = pyodbc.connect(cnxnstr, autocommit=True) cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8') cnxn.setencoding(encoding='utf-8') - cursor = cnxn.cursor() - for row in cursor.execute('SELECT * FROM Country').fetchall(): - logging.info(row) + # Fetch data + before = datetime.now() + flowmfa = pd.DataFrame(cursor.execute('SELECT * FROM FlowMFA').fetchall()) + logging.info('Shape of df is: {}'.format(countries.shape)) + after = datetime.now() + logging.info("FlowMFA query takes {}".format(after-before)) + + before = datetime.now() + flowdetailed = pd.DataFrame(cursor.execute('SELECT * FROM FlowDetailed').fetchall()) + logging.info('Shape of df is: {}'.format(countries.shape)) + after = datetime.now() + logging.info("FlowDetailed query takes {}".format(after-before)) + + # Authenticate to carto + cc = cartoframes.CartoContext(base_url='https://{}.carto.com/'.format(CARTO_USER), + api_key=CARTO_PASSWORD) + + cc.write(flowmfa, 'com_009_flowmfa', overwrite=True) + cc.write(flowdetailed, 'com_009_flowdetailed', overwrite=True) logging.info('SUCCESS') From f2149f421e4a7781748268d7b16eb3eb7f314753 Mon Sep 17 00:00:00 2001 From: nsuberi Date: Fri, 4 May 2018 01:41:51 +0800 Subject: [PATCH 06/10] wip - add back in countries query to prove this works, small cleanups --- .../contents/src/__init__.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/com_009_material_flow_databases/contents/src/__init__.py b/com_009_material_flow_databases/contents/src/__init__.py index 0525e200..19e2e7a0 100644 --- a/com_009_material_flow_databases/contents/src/__init__.py +++ b/com_009_material_flow_databases/contents/src/__init__.py @@ -1,9 +1,7 @@ -from __future__ import unicode_literals - +# Import libraries import os import sys import logging - from datetime import datetime import pyodbc @@ -25,7 +23,6 @@ CARTO_PASSWORD = os.environ.get('carto_password') def main(): - '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=logging.INFO) logging.info('STARTING') @@ -37,19 +34,27 @@ def main(): cursor = cnxn.cursor() # Fetch data + logging.info("DEMO - run query for countries table to prove this works") before = datetime.now() - flowmfa = pd.DataFrame(cursor.execute('SELECT * FROM FlowMFA').fetchall()) + countries = pd.DataFrame(cursor.execute('SELECT * FROM Countries').fetchall()) logging.info('Shape of df is: {}'.format(countries.shape)) after = datetime.now() + logging.info("Countries query takes {}".format(after-before)) + + logging.info("PROCESS THE meat and POTATOES") + before = datetime.now() + flowmfa = pd.DataFrame(cursor.execute('SELECT * FROM FlowMFA').fetchall()) + logging.info('Shape of df is: {}'.format(flowmfa.shape)) + after = datetime.now() logging.info("FlowMFA query takes {}".format(after-before)) before = datetime.now() flowdetailed = pd.DataFrame(cursor.execute('SELECT * FROM FlowDetailed').fetchall()) - logging.info('Shape of df is: {}'.format(countries.shape)) + logging.info('Shape of df is: {}'.format(flowdetailed.shape)) after = datetime.now() logging.info("FlowDetailed query takes {}".format(after-before)) - # Authenticate to carto + # Authenticate to carto and upload data cc = cartoframes.CartoContext(base_url='https://{}.carto.com/'.format(CARTO_USER), api_key=CARTO_PASSWORD) From 12f70d2edd9677b5d059d0ffb7fee0bdcf62cf13 Mon Sep 17 00:00:00 2001 From: nsuberi Date: Fri, 4 May 2018 01:48:54 +0800 Subject: [PATCH 07/10] wip big queries still take long - use pip to install cartoframes, fix some typos --- com_009_material_flow_databases/Dockerfile | 2 +- com_009_material_flow_databases/contents/src/__init__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/com_009_material_flow_databases/Dockerfile b/com_009_material_flow_databases/Dockerfile index cb75f2b3..ff249368 100644 --- a/com_009_material_flow_databases/Dockerfile +++ b/com_009_material_flow_databases/Dockerfile @@ -13,7 +13,7 @@ RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make ins # Provide pandas RUN conda install pandas -RUN conda install cartoframes +RUN pip install cartoframes # Provide name of container ARG NAME=com_009_connector diff --git a/com_009_material_flow_databases/contents/src/__init__.py b/com_009_material_flow_databases/contents/src/__init__.py index 19e2e7a0..d9e4b314 100644 --- a/com_009_material_flow_databases/contents/src/__init__.py +++ b/com_009_material_flow_databases/contents/src/__init__.py @@ -12,7 +12,7 @@ ODBC_SOURCE_URL = 'vps348928.ovh.net' ODBC_PORT = '5432' ODBC_DATABASE = 'mfa' -OBDC_USER = 'mfa' +ODBC_USER = 'mfa' ODBC_PASSWORD = os.environ.get('mfa_db_password') CONNECTION_STRING = 'DRIVER={};SERVER={};PORT={};DATABASE={};UID={};PWD={}' @@ -36,7 +36,7 @@ def main(): # Fetch data logging.info("DEMO - run query for countries table to prove this works") before = datetime.now() - countries = pd.DataFrame(cursor.execute('SELECT * FROM Countries').fetchall()) + countries = pd.DataFrame(cursor.execute('SELECT * FROM Country').fetchall()) logging.info('Shape of df is: {}'.format(countries.shape)) after = datetime.now() logging.info("Countries query takes {}".format(after-before)) From 0265d6966ab901624462d9e9f9f5fe539366e68a Mon Sep 17 00:00:00 2001 From: nsuberi Date: Mon, 7 May 2018 13:19:18 +0800 Subject: [PATCH 08/10] cleaned up dockerfile and __init__.py --- com_009_material_flow_databases/Dockerfile | 22 ++--- .../contents/src/__init__.py | 98 ++++++++++++++----- com_009_material_flow_databases/start.sh | 11 ++- 3 files changed, 90 insertions(+), 41 deletions(-) diff --git a/com_009_material_flow_databases/Dockerfile b/com_009_material_flow_databases/Dockerfile index ff249368..56f76893 100644 --- a/com_009_material_flow_databases/Dockerfile +++ b/com_009_material_flow_databases/Dockerfile @@ -1,9 +1,13 @@ FROM continuumio/miniconda3 +MAINTAINER Nathan Suberi + +# Provide name of container +ARG NAME # Install necessary libraries -RUN conda install pyodbc -RUN apt-get update -y -RUN apt-get install -y build-essential unixodbc-dev unixodbc-bin unixodbc libpq-dev +RUN apt-get update -y && apt-get install -y build-essential unixodbc-dev unixodbc-bin unixodbc libpq-dev +RUN conda update -n base conda && conda install pyodbc pandas +RUN pip install cartoframes && pip uninstall -y tqdm && pip install tqdm==4.20.0 # Configure postgresql drivers RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz @@ -11,16 +15,9 @@ RUN gunzip psqlodbc-09.02.0100.tar.gz RUN tar xvf psqlodbc-09.02.0100.tar RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install -# Provide pandas -RUN conda install pandas -RUN pip install cartoframes - -# Provide name of container -ARG NAME=com_009_connector -ENV NAME ${NAME} - # Copy the application folder inside the container RUN mkdir -p /opt/$NAME/data +VOLUME /opt/$NAME/data WORKDIR /opt/$NAME/ COPY contents/ . @@ -30,7 +27,6 @@ RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini # Restrict permissions RUN useradd -r $NAME RUN chown -R $NAME:$NAME /opt/$NAME -VOLUME /opt/$NAME/data -#USER $NAME +USER $NAME CMD ["python", "main.py"] diff --git a/com_009_material_flow_databases/contents/src/__init__.py b/com_009_material_flow_databases/contents/src/__init__.py index d9e4b314..cc685c03 100644 --- a/com_009_material_flow_databases/contents/src/__init__.py +++ b/com_009_material_flow_databases/contents/src/__init__.py @@ -8,6 +8,8 @@ import pandas as pd import cartoframes +LOG_LEVEL = logging.INFO + # ODBC Connection details -- these can be pulled out into an odbc.ini file ODBC_SOURCE_URL = 'vps348928.ovh.net' ODBC_PORT = '5432' @@ -19,46 +21,92 @@ cnxnstr = CONNECTION_STRING.format('{PostgreSQL Unicode}', ODBC_SOURCE_URL, ODBC_PORT, ODBC_DATABASE, ODBC_USER, ODBC_PASSWORD) # Carto Connection details -CARTO_USER = os.environ.get('carto_user') -CARTO_PASSWORD = os.environ.get('carto_password') +CARTO_USER = os.environ.get('CARTO_WRI_RW_USER') +CARTO_PASSWORD = os.environ.get('CARTO_WRI_RW_KEY') + +# Flow control +DOWNLOAD = True +# IN CASE RUN INTO TQDM PROBLEMS, refer to: https://github.com/tqdm/tqdm/issues/481 def main(): - logging.basicConfig(stream=sys.stderr, level=logging.INFO) + logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') + ### # Initialize pyodbc + ### + logging.info('Connection string: {}'.format(cnxnstr)) cnxn = pyodbc.connect(cnxnstr, autocommit=True) cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8') cnxn.setencoding(encoding='utf-8') cursor = cnxn.cursor() - # Fetch data - logging.info("DEMO - run query for countries table to prove this works") - before = datetime.now() - countries = pd.DataFrame(cursor.execute('SELECT * FROM Country').fetchall()) - logging.info('Shape of df is: {}'.format(countries.shape)) - after = datetime.now() - logging.info("Countries query takes {}".format(after-before)) - - logging.info("PROCESS THE meat and POTATOES") - before = datetime.now() - flowmfa = pd.DataFrame(cursor.execute('SELECT * FROM FlowMFA').fetchall()) - logging.info('Shape of df is: {}'.format(flowmfa.shape)) - after = datetime.now() - logging.info("FlowMFA query takes {}".format(after-before)) - - before = datetime.now() - flowdetailed = pd.DataFrame(cursor.execute('SELECT * FROM FlowDetailed').fetchall()) - logging.info('Shape of df is: {}'.format(flowdetailed.shape)) - after = datetime.now() - logging.info("FlowDetailed query takes {}".format(after-before)) + # For debugging purposes - there are sometimes when the tqdm package throws an error + # This flow control allows for testing the upload process specifically + if DOWNLOAD: + + ### + # Fetch data + ### + + logging.info("DEMO - run query for countries table to prove this works") + + before = datetime.now() + countries = pd.DataFrame.from_records(cursor.execute('SELECT * FROM Country').fetchall()) + logging.info('Shape of df is: {}'.format(countries.shape)) + after = datetime.now() + logging.info("Countries query takes {}".format(after-before)) + countries.to_csv('data/countries.csv') + + logging.info("PROCESS THE meat and POTATOES - can take some time depending on internet connection speed") + + before = datetime.now() + logging.info("Start time for FlowMFA: {}".format(before)) + flowmfa = pd.DataFrame.from_records(cursor.execute('SELECT * FROM FlowMFA').fetchall()) + logging.info('Shape of df is: {}'.format(flowmfa.shape)) + after = datetime.now() + logging.info("FlowMFA query takes {}".format(after-before)) + + flowmfa.columns = ['index', 'isoalpha3', 'flow', 'mfa13', 'mfa4', 'year', 'amount'] + flowmfa.drop('index', inplace=True) + flowmfa.to_csv('data/flowmfa.csv') + # before = datetime.now() + # flowdetailed = pd.DataFrame(cursor.execute('SELECT * FROM FlowDetailed').fetchall()) + # logging.info('Shape of df is: {}'.format(flowdetailed.shape)) + # after = datetime.now() + # logging.info("FlowDetailed query takes {}".format(after-before)) + # flowdetailed.columns = [???] + # flowdetailed.to_csv('data/flowdetailed.csv') + + else: + + logging.info('Attempting to load tables from docker volume') + + try: + flowmfa = pd.read_csv('data/flowmfa.csv') + except: + logging.warning('flowmfa table not already available') + + try: + flowdetailed = pd.read_csv('data/flowdetailed.csv') + except: + logging.warning('flowdetaild table not already available') + + + ### # Authenticate to carto and upload data + ### + cc = cartoframes.CartoContext(base_url='https://{}.carto.com/'.format(CARTO_USER), api_key=CARTO_PASSWORD) - cc.write(flowmfa, 'com_009_flowmfa', overwrite=True) - cc.write(flowdetailed, 'com_009_flowdetailed', overwrite=True) + ### + # Upload data + ### + + cc.write(flowmfa, 'com_009_flowmfa_autoupdate', overwrite=True) + # cc.write(flowdetailed, 'com_009_flowdetailed', overwrite=True) logging.info('SUCCESS') diff --git a/com_009_material_flow_databases/start.sh b/com_009_material_flow_databases/start.sh index 440cea56..cda53390 100755 --- a/com_009_material_flow_databases/start.sh +++ b/com_009_material_flow_databases/start.sh @@ -5,11 +5,16 @@ NAME=$(basename $(pwd)) LOG=${LOG:-udp://localhost} docker build -t $NAME --build-arg NAME=$NAME . -docker run -it --log-driver=syslog \ +docker run -it -v data:/opt/$NAME/data \ + --log-driver=syslog \ --log-opt syslog-address=$LOG \ --log-opt tag=$NAME \ --env-file .env \ --rm $NAME \ - python main.py + /bin/bash - #/bin/bash + #python main.py + + + + # From 3bdc71527b0d0f6921993fd4b44003ee605c5837 Mon Sep 17 00:00:00 2001 From: nsuberi Date: Mon, 7 May 2018 13:20:32 +0800 Subject: [PATCH 09/10] clean up start.sh, move volume external mount to a comment below --- com_009_material_flow_databases/start.sh | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/com_009_material_flow_databases/start.sh b/com_009_material_flow_databases/start.sh index cda53390..4cda42f6 100755 --- a/com_009_material_flow_databases/start.sh +++ b/com_009_material_flow_databases/start.sh @@ -5,16 +5,13 @@ NAME=$(basename $(pwd)) LOG=${LOG:-udp://localhost} docker build -t $NAME --build-arg NAME=$NAME . -docker run -it -v data:/opt/$NAME/data \ +docker run -it \ --log-driver=syslog \ --log-opt syslog-address=$LOG \ --log-opt tag=$NAME \ --env-file .env \ --rm $NAME \ - /bin/bash + python main.py - #python main.py - - - - # + #/bin/bash + #-v data:/opt/$NAME/data \ From bf724298117dbb312007c244dc851da559832498 Mon Sep 17 00:00:00 2001 From: nsuberi Date: Tue, 29 May 2018 09:42:28 -0400 Subject: [PATCH 10/10] upload data so that amount column is numeric --- com_009_material_flow_databases/Dockerfile | 4 ++-- com_009_material_flow_databases/contents/src/__init__.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/com_009_material_flow_databases/Dockerfile b/com_009_material_flow_databases/Dockerfile index 56f76893..baf15e45 100644 --- a/com_009_material_flow_databases/Dockerfile +++ b/com_009_material_flow_databases/Dockerfile @@ -26,7 +26,7 @@ RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini # Restrict permissions RUN useradd -r $NAME -RUN chown -R $NAME:$NAME /opt/$NAME -USER $NAME +RUN chown -R $NAME:$NAME . +#USER $NAME CMD ["python", "main.py"] diff --git a/com_009_material_flow_databases/contents/src/__init__.py b/com_009_material_flow_databases/contents/src/__init__.py index cc685c03..7eca7326 100644 --- a/com_009_material_flow_databases/contents/src/__init__.py +++ b/com_009_material_flow_databases/contents/src/__init__.py @@ -68,8 +68,10 @@ def main(): after = datetime.now() logging.info("FlowMFA query takes {}".format(after-before)) + flowmfa.columns = ['index', 'isoalpha3', 'flow', 'mfa13', 'mfa4', 'year', 'amount'] - flowmfa.drop('index', inplace=True) + flowmfa.drop('index', axis=1, inplace=True) + flowmfa['amount'] = flowmfa['amount'].astype(float) flowmfa.to_csv('data/flowmfa.csv') # before = datetime.now()