ianbuss · jrkinley-zz · Dec 10, 2015 · Dec 11, 2015 · Dec 11, 2015
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 *.pyc
 *.pyo
+.DS_Store
diff --git a/cmf/__init__.py b/cmf/__init__.py
diff --git a/cmf/subprocess_timeout.py b/cmf/subprocess_timeout.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2013 Cloudera, Inc. All rights reserved.
+
+import fcntl
+import logging
+import os
+import signal
+import subprocess
+import time
+
+# from cmf.throttling_logger import ThrottlingLogger
+
+LOG = logging.getLogger(__name__)
+# THROTTLED_LOG = ThrottlingLogger(LOG)
+
+def _non_block_read(output):
+  """
+  Sets a file descriptor to non-blocking mode and
+  returns the result of a read from the stream.
+  Restores the descriptor to its original status
+  """
+  fd = output.fileno()
+  fl = fcntl.fcntl(fd, fcntl.F_GETFL)
+  fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
+  try:
+    try:
+      return output.read()
+    except:
+      return ''
+  finally:
+    fcntl.fcntl(fd, fcntl.F_SETFL, fl)
+
+def subprocess_with_timeout(args, timeout,
+                            stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                            env=None, cwd=None):
+  """
+  Calls subprocess.Popen() and blocks for it to finish, timing out if
+  'timeout' seconds elapse.
+  """
+  kwargs = {
+    'args' : args,
+    'stdout' : stdout,
+    'stderr' : stderr,
+    'close_fds' : True,
+  }
+  if env is not None:
+    kwargs['env'] = env
+  if cwd is not None:
+    kwargs['cwd'] = cwd
+  p = subprocess.Popen(**kwargs)
+  t_begin = time.time()
+  seconds_passed = 0
+  stdout_cap = ''
+  stderr_cap = ''
+
+  # We spin here (with a 0.1 second granularity)
+  # to find when the process has died and to
+  # pump its output from it.  If we didn't
+  # handle the output, buffered output could
+  # cause it to block unnecessarily and not
+  # finish.
+  #
+  # Even if we didn't care about output,
+  # using signals (a la solutions at http://stackoverflow.com/questions/1191374/subprocess-with-timeout)
+  # is problematic in a context with threads and using
+  # select() on the output may be problematic too (what happens
+  # when it's closed?).  I've never tried using select() on
+  # the /proc/pid directory.
+  while p.poll() is None and seconds_passed < timeout:
+    # p.std* blocks on read(), which messes up the timeout timer.
+    if stdout == subprocess.PIPE:
+      stdout_cap += _non_block_read(p.stdout)
+    if stderr == subprocess.PIPE:
+      stderr_cap += _non_block_read(p.stderr)
+    time.sleep(0.1)
+    seconds_passed = time.time() - t_begin
+
+  if seconds_passed >= timeout:
+    try:
+      if stdout == subprocess.PIPE:
+        p.stdout.close()
+      if stderr == subprocess.PIPE:
+        p.stderr.close()
+      # use os.kill instead of p.terminate for python 2.4 compatibility
+      os.kill(p.pid, signal.SIGTERM)
+    except:
+      LOG.exception("Kill subprocess exception with args %s" % args)
+
+    # Make sure it's really dead
+    if p.poll() is None:
+      time.sleep(1.0)
+      os.kill(p.pid, signal.SIGKILL)
+
+    LOG.exception("Timeout with args %s" % args)
+    raise Exception("timeout with args %s" % args)
+
+  if stdout == subprocess.PIPE:
+    stdout_cap += _non_block_read(p.stdout)
+  if stderr == subprocess.PIPE:
+    stderr_cap += _non_block_read(p.stderr)
+  return p.returncode, stdout_cap, stderr_cap
diff --git a/config.py b/config.py
@@ -3,24 +3,69 @@
 import json
 import logging
 import sys
+import os
+
+logging.basicConfig(
+    stream=sys.stdout,
+    level=logging.INFO,
+    format='%(asctime)s %(levelname)s %(message)s')
+
+PROVISIONATOR_CONFIG_ENV = 'PROVISIONATOR_CONFIG'
+LOG = logging.getLogger(__name__)
 
 def read_json_config(configfile):
-  logging.debug("Reading JSON config from %s" % configfile)
+  LOG.debug("Reading JSON config from %s" % configfile)
   with open(configfile, 'r') as f:
     config = json.load(f)
-    logging.debug("JSON Config: %s" % json.dumps(config, indent=2))
+    LOG.debug("JSON Config: %s" % json.dumps(config, indent=2))
     return config
 
-def read_config(configfile, fmt="json"):
+def read_config(fmt="json"):
+  if len(sys.argv) > 1:
+    configfile = sys.argv[1]
+  elif PROVISIONATOR_CONFIG_ENV in os.environ:
+    configfile = os.environ.get(PROVISIONATOR_CONFIG_ENV)
+  else:
+    LOG.fatal("No configuration file specified")
+    sys.exit(-1)
+
   if fmt == "json":
     return read_json_config(configfile)
   else:
-    logging.error("Config files in %s format not supported" % fmt)
+    LOG.error("Config files in %s format not supported" % fmt)
+
+def get_value(config, key, service_type=None, role_type=None):
+  """
+  Returns the value for the given key, or None if the key is not found
+  config       -- Configuration string
+  key          -- Property to search for
+  service_type -- Service type to search (e.g. 'HDFS')
+                  Pass None to search all service types (default None)
+  role_type    -- Role type to search (e.g. 'DATANODE')
+                  Pass None to search all role types (default None)
+  """
+  for service in config['cluster']['services']:
+    if service_type == None or service_type == service['type']:
+      for role in service['roles']:
+        if role_type == None or role_type == role['type']:
+          if 'config' in role:
+            if key in role['config']:
+              LOG.debug("Found property: '%s : %s'",
+                key, role['config'][key])
+              return role['config'][key]
+  LOG.debug("Did not find property '%s' in configuration with filters: "
+    "service_type = '%s', role_type = '%s'", key, service_type, role_type)
+  return None
+
+def get_roles(config, hostname):
+  """ Returns a set of roles for the given hostname """
+  roles = []
+  for service in config['cluster']['services']:
+    for role in service['roles']:
+      if hostname in role['hosts']:
+        roles.append(role['type'])
+  return roles
 
 if __name__ == "__main__":
-  logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
-  if len(sys.argv) > 1:
-    read_config(sys.argv[1])
-  else:
-    logging.error("No config file supplied")
+  read_config()
 
diff --git a/prereq/__init__.py b/prereq/__init__.py
diff --git a/prereq/hw.py b/prereq/hw.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+from config import get_roles
+from config import get_value
+from config import read_config
+import os
+import logging
+import socket
+
+LOG = logging.getLogger(__name__)
+DEFAULT_FSTAB = '/etc/fstab'
+DIR_LIST_KEY = 'dfs_data_dir_list'
+SUPPORTED_FILESYSTEMS = ('ext3', 'ext4', 'xfs')
+
+def main(config):
+  check_mem()
+  if 'DATANODE' in get_roles(config, socket.gethostname()):
+    check_dn_disks(config)
+
+def check_mem():
+  """ Check host has more than 10GB memory """
+  mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')
+  LOG.debug('Memory: %s' % str(mem_bytes))
+
+  if mem_bytes < 10737418240:
+    LOG.error('Not enough memory: %d' % mem_bytes)
+  else:
+    LOG.info('Memory ok: %d' % mem_bytes)
+
+def read_fstab():
+  """ Reads /etc/fstab file and returns list of devices """
+  devices = []
+  try:
+    with open(DEFAULT_FSTAB, 'r') as f:
+      fstab = f.read()
+      for dev in fstab.splitlines():
+        if dev.startswith('#'):
+          continue
+        else:
+          LOG.debug("Read device from fstab: %s" % dev)
+          devices.append(dev)
+  except IOError:
+    LOG.error("Unable to read '%s'" % DEFAULT_FSTAB)
+  return devices
+
+def get_mount(devices, mount_point):
+  """
+  Get <file system type> and <options> for given mount point
+
+  Fstab format:
+  <device> <mount point> <file system type> <options> <dump> <pass num>
+
+  devices     -- list of devices from read_fstab()
+  mount_point -- mount point get info for
+  """
+  for dev in devices:
+    tokens = dev.split(' ')
+    if len(tokens) == 6:
+      if tokens[1] == mount_point:
+        return tokens[2], tokens[3]
+    else:
+      LOG.error('Unexpected number of fields in fstab entry: %s' % dev)
+  LOG.error("Mount point '%s' not defined in fstab" % mount_point)
+  return None, None
+
+def check_dn_disks(config):
+  """ Check disks given in 'dfs_data_dir_list' """
+  LOG.debug('Checking DataNode disks')
+  dir_list = get_value(config, DIR_LIST_KEY, 'HDFS', 'DATANODE')
+  devices = read_fstab()
+  if dir_list:
+    LOG.debug('%s: %s', DIR_LIST_KEY, dir_list)
+    for disk in dir_list.split(','):
+      fstype, options = get_mount(devices, disk)
+
+      if not fstype == None:
+        # Check disk has been formatted with ext3, ext4, or xfs
+        LOG.info("Disk '%s' formatted with '%s'", disk, fstype)
+        if not fstype.lower() in SUPPORTED_FILESYSTEMS:
+          LOG.error("Disk '%s' is not mounted or has not been "
+            "formatted with ext3, ext4, or xfs" % disk)
+      else:
+        LOG.error("Unable to determine file system format for disk '%s'. "
+          "Check fstab" % disk)
+
+      if not options == None:
+        # Check disk has been mounted with 'noatime' option
+        LOG.info("Disk '%s' mount options: '%s'", disk, options)
+        if 'noatime' not in options:
+          LOG.error("Disk '%s' is not mounted or has not been "
+            "mounted with 'noatime' option" % disk)
+      else:
+        LOG.error("Unable to determine mount options for disk '%s'. "
+          "Check fstab" % disk)
+  else:
+    LOG.error("Configuration error: "
+      "unable to find property '%s'" % DIR_LIST_KEY)
+
+if __name__ == '__main__':
+  main(read_config())
+
diff --git a/prereq/jdk.py b/prereq/jdk.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+import logging
+import config
+from cmf.subprocess_timeout import subprocess_with_timeout
+
+LOG = logging.getLogger(__name__)
+LOG_JDK = 'Supported minimum Oracle JDK versions in C5.5.x: 1.7.0_55 and 1.8.0_31 (1.8.0_40 is a known bad version)'
+
+JDK = {
+  '1.7.0': 55,
+  '1.8.0': 31
+}
+
+def main():
+  check_hotspot()
+  check_jdk_version()
+
+def check_hotspot():
+  """
+  Check for preinstalled Oracle Java
+  'java -version' writes to stderr
+  """
+  args = ['java', '-version']
+  result, stdout, stderr = subprocess_with_timeout(args, 10)
+  LOG.debug('java -version:\n%s', stderr.rstrip())
+  if stderr.startswith('java'):
+    # Check for Oracle Java
+    if 'HotSpot' not in stderr:
+      LOG.error("Running unsupported Java implementation. %s", LOG_JDK)
+    else:
+      LOG.info('Running supported Java implementation (Oracle)')
+
+def check_jdk_version():
+  """
+  Check preinstalled JDK versions
+  C5.5.x:
+    1.7.0_55 (min)
+    1.8.0_31 (min) (note: JDK 1.8.0_40 is not recommended)
+    'javac -version' writes to stderr
+  """
+  args = ['javac', '-version']
+  result, stdout, stderr = subprocess_with_timeout(args, 10)
+  LOG.debug('javac -version: %s', stderr.rstrip())
+
+  if stderr.startswith('javac'):
+    v = stderr[6:].rstrip()
+    r = v.split('_')[0]
+    u = v.split('_')[1]
+
+    if r not in JDK.keys():
+      LOG.error("Running unsupported version of JDK: '%s'. %s", v, LOG_JDK)
+    elif int(u) < JDK[r]:
+      LOG.error("Running unsupported update of JDK: '%s'. %s", v, LOG_JDK)
+    elif v is '1.8.0_40':
+      LOG.error("Running known bad version of JDK: '%s'. %s", v, LOG_JDK)
+    else:
+      LOG.info("Running supported version of JDK: '%s'" % v)
+
+if __name__ == '__main__':
+  main()