diff --git a/com_009_material_flow_databases/.gitignore b/com_009_material_flow_databases/.gitignore new file mode 100644 index 00000000..0472744e --- /dev/null +++ b/com_009_material_flow_databases/.gitignore @@ -0,0 +1,6 @@ +*.env +*# +*.py[c|o] +*.DS_Store +data/* +credentials.json \ No newline at end of file diff --git a/com_009_material_flow_databases/.sampleenv b/com_009_material_flow_databases/.sampleenv new file mode 100644 index 00000000..e3f6829d --- /dev/null +++ b/com_009_material_flow_databases/.sampleenv @@ -0,0 +1,4 @@ +mfa_db_password=<> + +carto_user=<> +carto_password=<> diff --git a/com_009_material_flow_databases/Dockerfile b/com_009_material_flow_databases/Dockerfile new file mode 100644 index 00000000..baf15e45 --- /dev/null +++ b/com_009_material_flow_databases/Dockerfile @@ -0,0 +1,32 @@ +FROM continuumio/miniconda3 +MAINTAINER Nathan Suberi + +# Provide name of container +ARG NAME + +# Install necessary libraries +RUN apt-get update -y && apt-get install -y build-essential unixodbc-dev unixodbc-bin unixodbc libpq-dev +RUN conda update -n base conda && conda install pyodbc pandas +RUN pip install cartoframes && pip uninstall -y tqdm && pip install tqdm==4.20.0 + +# Configure postgresql drivers +RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz +RUN gunzip psqlodbc-09.02.0100.tar.gz +RUN tar xvf psqlodbc-09.02.0100.tar +RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install + +# Copy the application folder inside the container +RUN mkdir -p /opt/$NAME/data +VOLUME /opt/$NAME/data +WORKDIR /opt/$NAME/ +COPY contents/ . + +# Set up ODBC driver info +RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini + +# Restrict permissions +RUN useradd -r $NAME +RUN chown -R $NAME:$NAME . +#USER $NAME + +CMD ["python", "main.py"] diff --git a/com_009_material_flow_databases/Dockerfile_without_conda b/com_009_material_flow_databases/Dockerfile_without_conda new file mode 100644 index 00000000..52b7f055 --- /dev/null +++ b/com_009_material_flow_databases/Dockerfile_without_conda @@ -0,0 +1,41 @@ +FROM python:3.6 +MAINTAINER Nathan Suberi + +RUN apt-get update -y + +# Install core libraries for ODBC connection +RUN apt-get install -y build-essential unixodbc-dev unixodbc-bin unixodbc + +# https://github.com/mkleehammer/pyodbc +RUN pip install --upgrade pip && pip install pyodbc + +## Some attempts at manually installing the drivers fail +## https://blog.csdn.net/jollypigclub/article/details/46490541 +## https://www.cnblogs.com/he11o-liu/p/7503232.html +## https://odbc.postgresql.org/docs/unix-compilation.html + +RUN wget https://ftp.postgresql.org/pub/odbc/versions/src/psqlodbc-09.02.0100.tar.gz +RUN gunzip psqlodbc-09.02.0100.tar.gz +RUN tar xvf psqlodbc-09.02.0100.tar +RUN cd psqlodbc-09.02.0100 && sh ./configure --with-unixodbc && make && make install + +# set name +ARG NAME=nrt-script +ENV NAME ${NAME} + +# copy the application folder inside the container +RUN mkdir -p /opt/$NAME/data +WORKDIR /opt/$NAME/ +COPY contents/ . + +# Set up ODBC driver info +RUN mv /opt/$NAME/odbcinst.ini /etc/odbcinst.ini +RUN cat /etc/odbcinst.ini +RUN odbcinst -j + +RUN useradd -r $NAME +RUN chown -R $NAME:$NAME /opt/$NAME +VOLUME /opt/$NAME/data +#USER $NAME + +CMD ["python", "main.py"] diff --git a/com_009_material_flow_databases/README.md b/com_009_material_flow_databases/README.md new file mode 100644 index 00000000..6af37d79 --- /dev/null +++ b/com_009_material_flow_databases/README.md @@ -0,0 +1,22 @@ +# gee-test + +Test NRT script for uploading to GEE. + +# Run + +Copy `.env.sample` to `.env` and enter account credentials. Copy GCS service account credential file to `credentials.json`. + +`./start.sh` Build docker and run once. + +# Modify + +`start.sh` Edit script name / Docker image name. + +`contents/` Copied into container. + +`contents/src/__init__.py` Main application script. + +`contents/src/eeUtil/` Utility module for interacting with GEE. + +`time.cron` Edit cron freqency. + diff --git a/com_009_material_flow_databases/contents/main.py b/com_009_material_flow_databases/contents/main.py new file mode 100644 index 00000000..eecdfb9a --- /dev/null +++ b/com_009_material_flow_databases/contents/main.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +if __name__ == '__main__': + import src + src.main() diff --git a/com_009_material_flow_databases/contents/odbcinst.ini b/com_009_material_flow_databases/contents/odbcinst.ini new file mode 100644 index 00000000..4eeaf09d --- /dev/null +++ b/com_009_material_flow_databases/contents/odbcinst.ini @@ -0,0 +1,7 @@ +[PostgreSQL Unicode] +Description = PostgreSQL ODBC driver (Unicode version) +Driver = /usr/local/lib/psqlodbcw.so +Setup = libodbcpsqlS.so +Debug = 0 +CommLog = 1 +UsageCount = 2 diff --git a/com_009_material_flow_databases/contents/src/__init__.py b/com_009_material_flow_databases/contents/src/__init__.py new file mode 100644 index 00000000..7eca7326 --- /dev/null +++ b/com_009_material_flow_databases/contents/src/__init__.py @@ -0,0 +1,114 @@ +# Import libraries +import os +import sys +import logging +from datetime import datetime + +import pyodbc +import pandas as pd +import cartoframes + +LOG_LEVEL = logging.INFO + +# ODBC Connection details -- these can be pulled out into an odbc.ini file +ODBC_SOURCE_URL = 'vps348928.ovh.net' +ODBC_PORT = '5432' +ODBC_DATABASE = 'mfa' +ODBC_USER = 'mfa' +ODBC_PASSWORD = os.environ.get('mfa_db_password') + +CONNECTION_STRING = 'DRIVER={};SERVER={};PORT={};DATABASE={};UID={};PWD={}' +cnxnstr = CONNECTION_STRING.format('{PostgreSQL Unicode}', ODBC_SOURCE_URL, ODBC_PORT, ODBC_DATABASE, ODBC_USER, ODBC_PASSWORD) + +# Carto Connection details +CARTO_USER = os.environ.get('CARTO_WRI_RW_USER') +CARTO_PASSWORD = os.environ.get('CARTO_WRI_RW_KEY') + +# Flow control +DOWNLOAD = True +# IN CASE RUN INTO TQDM PROBLEMS, refer to: https://github.com/tqdm/tqdm/issues/481 + +def main(): + logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) + logging.info('STARTING') + + ### + # Initialize pyodbc + ### + + logging.info('Connection string: {}'.format(cnxnstr)) + cnxn = pyodbc.connect(cnxnstr, autocommit=True) + cnxn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8') + cnxn.setencoding(encoding='utf-8') + cursor = cnxn.cursor() + + # For debugging purposes - there are sometimes when the tqdm package throws an error + # This flow control allows for testing the upload process specifically + if DOWNLOAD: + + ### + # Fetch data + ### + + logging.info("DEMO - run query for countries table to prove this works") + + before = datetime.now() + countries = pd.DataFrame.from_records(cursor.execute('SELECT * FROM Country').fetchall()) + logging.info('Shape of df is: {}'.format(countries.shape)) + after = datetime.now() + logging.info("Countries query takes {}".format(after-before)) + countries.to_csv('data/countries.csv') + + logging.info("PROCESS THE meat and POTATOES - can take some time depending on internet connection speed") + + before = datetime.now() + logging.info("Start time for FlowMFA: {}".format(before)) + flowmfa = pd.DataFrame.from_records(cursor.execute('SELECT * FROM FlowMFA').fetchall()) + logging.info('Shape of df is: {}'.format(flowmfa.shape)) + after = datetime.now() + logging.info("FlowMFA query takes {}".format(after-before)) + + + flowmfa.columns = ['index', 'isoalpha3', 'flow', 'mfa13', 'mfa4', 'year', 'amount'] + flowmfa.drop('index', axis=1, inplace=True) + flowmfa['amount'] = flowmfa['amount'].astype(float) + flowmfa.to_csv('data/flowmfa.csv') + + # before = datetime.now() + # flowdetailed = pd.DataFrame(cursor.execute('SELECT * FROM FlowDetailed').fetchall()) + # logging.info('Shape of df is: {}'.format(flowdetailed.shape)) + # after = datetime.now() + # logging.info("FlowDetailed query takes {}".format(after-before)) + # flowdetailed.columns = [???] + # flowdetailed.to_csv('data/flowdetailed.csv') + + else: + + logging.info('Attempting to load tables from docker volume') + + try: + flowmfa = pd.read_csv('data/flowmfa.csv') + except: + logging.warning('flowmfa table not already available') + + try: + flowdetailed = pd.read_csv('data/flowdetailed.csv') + except: + logging.warning('flowdetaild table not already available') + + + ### + # Authenticate to carto and upload data + ### + + cc = cartoframes.CartoContext(base_url='https://{}.carto.com/'.format(CARTO_USER), + api_key=CARTO_PASSWORD) + + ### + # Upload data + ### + + cc.write(flowmfa, 'com_009_flowmfa_autoupdate', overwrite=True) + # cc.write(flowdetailed, 'com_009_flowdetailed', overwrite=True) + + logging.info('SUCCESS') diff --git a/com_009_material_flow_databases/main.py b/com_009_material_flow_databases/main.py new file mode 100644 index 00000000..eecdfb9a --- /dev/null +++ b/com_009_material_flow_databases/main.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +if __name__ == '__main__': + import src + src.main() diff --git a/com_009_material_flow_databases/start.sh b/com_009_material_flow_databases/start.sh new file mode 100755 index 00000000..4cda42f6 --- /dev/null +++ b/com_009_material_flow_databases/start.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +#Change the NAME variable with the name of your script +NAME=$(basename $(pwd)) +LOG=${LOG:-udp://localhost} + +docker build -t $NAME --build-arg NAME=$NAME . +docker run -it \ + --log-driver=syslog \ + --log-opt syslog-address=$LOG \ + --log-opt tag=$NAME \ + --env-file .env \ + --rm $NAME \ + python main.py + + #/bin/bash + #-v data:/opt/$NAME/data \ diff --git a/com_009_material_flow_databases/time.cron b/com_009_material_flow_databases/time.cron new file mode 100644 index 00000000..e72b2d58 --- /dev/null +++ b/com_009_material_flow_databases/time.cron @@ -0,0 +1 @@ +0 0 0 0 *