Merge pull request #10 from histogrammar/use-new-language-independent-spec

jpivarski · web-flow · commit d5a6c7856f5c · 2016-08-26T22:27:11.000-05:00
Use new language independent spec
diff --git a/docs/conf.py b/docs/conf.py
@@ -47,9 +47,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = "0.9-prerelease"
+version = "1.0.0"
 # The full version, including alpha/beta/rc tags.
-release = "0.9-prerelease"
+release = "1.0.0"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/histogrammar/primitives/bag.py b/histogrammar/primitives/bag.py
@@ -26,7 +26,7 @@ class Bag(Factory, Container):
 
     A bag is the appropriate data type for scatter plots: a container that collects raw values, maintaining multiplicity but not order. (A "bag" is also known as a "multiset.") Conceptually, it is a mapping from distinct raw values to the number of observations: when two instances of the same raw value are observed, one key is stored and their weights add.
 
-    Although the user-defined function may return scalar numbers, fixed-dimension vectors of numbers, or categorical strings, it may not mix types. Different Bag primitives in an analysis tree may collect different types.
+    Although the user-defined function may return scalar numbers, fixed-dimension vectors of numbers, or categorical strings, it may not mix range types. For the purposes of Label and Index (which can only collect aggregators of a single type), bags with different ranges are different types.
     """
 
     @staticmethod
@@ -318,6 +318,9 @@ def __eq__(self, other):
                     if isinstance(v1i, numbers.Real) and isinstance(v2i, numbers.Real):
                         if not numeq(v1i, v2i):
                             return False
+                    elif isinstance(v1i, basestring) and isinstance(v2i, basestring):
+                        if v1i != v2i:
+                            return False
                     else:
                         return False
             else:
diff --git a/histogrammar/primitives/collection.py b/histogrammar/primitives/collection.py
@@ -256,6 +256,10 @@ def __init__(self, **pairs):
         contentType = list(pairs.values())[0].name
         if any(x.name != contentType for x in pairs.values()):
             raise ContainerException("all Label values must have the same type")
+        if contentType == "Bag":
+            rangeType = list(pairs.values())[0].range
+            if any(x.range != rangeType for x in pairs.values()):
+                raise ContainerException("all Label values must have the same type")
 
         self.entries = 0.0
         self.pairs = pairs
@@ -631,6 +635,10 @@ def __init__(self, *values):
         contentType = values[0].name
         if any(x.name != contentType for x in values):
             raise ValueError("all Index values must have the same type")
+        if contentType == "Bag":
+            rangeType = values[0].range
+            if any(x.range != rangeType for x in values):
+                raise ValueError("all Index values must have the same type")
 
         self.entries = 0.0
         self.values = values
diff --git a/histogrammar/util.py b/histogrammar/util.py
@@ -140,7 +140,9 @@ def floatOrNan(x):
 
 def floatToJson(x):
     """Custom rule for converting non-finite numbers to JSON as quoted strings: ``"inf"``, ``"-inf"``, and ``"nan"``. This avoids Python's bad habit of putting literal ``Infinity``, ``-Infinity``, and ``NaN`` in the JSON (without quotes)."""
-    if math.isnan(x):
+    if x in ("nan", "inf", "-inf"):
+        return x
+    elif math.isnan(x):
         return "nan"
     elif math.isinf(x) and x > 0.0:
         return "inf"
@@ -222,9 +224,10 @@ def function(datum):
                     # fill the namespace with math.* functions
                     context.update(math.__dict__)
 
-                    # if you have Numpy, override the namespace with numpy.* functions
+                    # if you have Numpy, include numpy.* functions
                     if numpy is not None:
-                        context.update(numpy.__dict__)
+                        context["numpy"] = numpy
+                        context["np"] = numpy
 
                     # if the datum is a dict, override the namespace with its dict keys
                     if isinstance(datum, dict):                # if it's a dict
@@ -244,13 +247,18 @@ def function(datum):
                         except AttributeError:
                             v, = varname                       # otherwise, use the one and only variable
                             if v is None:                      # as the object (only discover it once)
-                                try:
-                                    v, = set(c.co_names) - set(context.keys())
-                                except ValueError:
+                                v = set(c.co_names) - set(context.keys())
+                                if len(v) > 1:
                                     raise NameError("more than one unrecognized variable names in single-argument function: {0}".format(set(c.co_names) - set(context.keys())))
+                                elif len(v) == 0:
+                                    v = None
+                                else:
+                                    v = list(v)[0]
+
                                 varname[0] = v
 
-                            context.update({v: datum})
+                            if v is not None:
+                                context.update({v: datum})
 
                     return eval(c, context)
 
diff --git a/histogrammar/version.py b/histogrammar/version.py
@@ -16,7 +16,7 @@
 
 import re
 
-__version__ = "0.9-prerelease"
+__version__ = "1.0.0"
 
 version = __version__
 
diff --git a/test/testbasic.py b/test/testbasic.py
@@ -21,7 +21,7 @@
 
 from histogrammar import *
 
-class TestOriginal(unittest.TestCase):
+class TestBasic(unittest.TestCase):
     simple = [3.4, 2.2, -1.8, 0.0, 7.3, -4.7, 1.6, 0.0, -3.0, -1.7]
 
     class Struct(object):
diff --git a/test/testspec.py b/test/testspec.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+
+# Copyright 2016 DIANA-HEP
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import math
+import sys
+import unittest
+import urllib2
+
+from histogrammar import *
+import histogrammar.version
+
+tolerance = 1e-12
+util.relativeTolerance = tolerance
+util.absoluteTolerance = tolerance
+
+class TestSpec(unittest.TestCase):
+    def compare(self, x, y, name):
+        if Factory.fromJson(x) != Factory.fromJson(y):
+            sys.stderr.write("                                          FAILED " + name + "\n")
+            sys.stderr.write("                  PYTHON                           |                   SPECIFICATION\n")
+            left = json.dumps(x, sort_keys=True, indent=2)
+            right = json.dumps(y, sort_keys=True, indent=2)
+            for leftline, rightline in zip(left.split("\n"), right.split("\n")):
+                if leftline != rightline:
+                    sys.stderr.write("{0:50s} > {1}\n".format(leftline, rightline))
+                else:
+                    sys.stderr.write("{0:50s} | {1}\n".format(leftline, rightline))
+            self.assertEqual(Factory.fromJson(x), Factory.fromJson(y))
+        
+    def runTest(self):
+        sys.stdout.write("Downloading expected results, generated by specification {0}...\n".format(histogrammar.version.specification))
+        try:
+            testdata = json.load(urllib2.urlopen("http://histogrammar.org/test/{0}/test-data.json".format(histogrammar.version.specification)))
+        except Exception as err:
+            sys.stdout.write("could not download http://histogrammar.org/test/{0}/test-data.json\nbecause of {1}: {2}\n".format(histogrammar.version.specification, err.__class__.__name__, str(err)))
+            return
+        try:
+            testresults = json.load(urllib2.urlopen("http://histogrammar.org/test/{0}/test-results.json".format(histogrammar.version.specification)))
+        except Exception as err:
+            sys.stdout.write("could not download http://histogrammar.org/test/{0}/test-results.jsonbecause of {1}: {2}\n\n".format(histogrammar.version.specification, err.__class__.__name__, str(err)))
+            return
+
+        for x in testdata:
+            for k, v in x.items():
+                if k != "strings" and v in ("nan", "inf", "-inf"):
+                    x[k] = float(v)
+        
+        def stripNames(x):
+            if hasattr(x, "quantity"):
+                x.quantity.name = None
+            elif hasattr(x, "quantityName"):
+                x.quantityName = None
+            for xi in x.children:
+                stripNames(xi)
+
+        for testresult in testresults:
+            sys.stderr.write(testresult["expr"] + "\n")
+
+            zero = testresult["zero-named"]
+            one = testresult["one-named"]
+            two = testresult["two-named"]
+
+            h1 = eval(testresult["expr"])
+            h2 = eval(testresult["expr"])
+
+            self.compare(h1.toJson(), zero, "NAMED ZERO")
+            self.compare((h1 + h1).toJson(), zero, "NAMED ZERO + ZERO")
+            self.compare(h1.zero().toJson(), zero, "NAMED ZERO.zero()")
+
+            for x in testdata:
+                h1.fill(x)
+                h2.fill(x)
+            self.compare(h1.toJson(), one, "NAMED ONE")
+            self.compare(h1.zero().toJson(), zero, "NAMED ONE.zero()")
+            self.compare((h1 + h1.zero()).toJson(), one, "NAMED ONE + ZERO")
+            self.compare((h1.zero() + h1).toJson(), one, "NAMED ZERO + ONE")
+
+            self.compare((h1 + h2).toJson(), two, "NAMED TWO VIA PLUS")
+
+            for x in testdata:
+                h1.fill(x)
+            self.compare(h1.toJson(), two, "NAMED TWO VIA FILL")
+
+            zero = testresult["zero-anonymous"]
+            one = testresult["one-anonymous"]
+            two = testresult["two-anonymous"]
+
+            h1 = eval(testresult["expr"])
+            stripNames(h1)
+            h2 = eval(testresult["expr"])
+            stripNames(h2)
+
+            self.compare(h1.toJson(), zero, "ANONYMOUS ZERO")
+            self.compare((h1 + h1).toJson(), zero, "ANONYMOUS ZERO + ZERO")
+            self.compare(h1.zero().toJson(), zero, "ANONYMOUS ZERO.zero()")
+
+            for x in testdata:
+                h1.fill(x)
+                h2.fill(x)
+            self.compare(h1.toJson(), one, "ANONYMOUS ONE")
+            self.compare(h1.zero().toJson(), zero, "ANONYMOUS ONE.zero()")
+            self.compare((h1 + h1.zero()).toJson(), one, "ANONYMOUS ONE + ZERO")
+            self.compare((h1.zero() + h1).toJson(), one, "ANONYMOUS ZERO + ONE")
+
+            self.compare((h1 + h2).toJson(), two, "ANONYMOUS TWO VIA PLUS")
+
+            for x in testdata:
+                h1.fill(x)
+            self.compare(h1.toJson(), two, "ANONYMOUS TWO VIA FILL")