Skip to content

Commit 664eedd

Browse files
committed
Rename Categorize's 'pairs' as 'bins' for better coherence with CentrallyBin, IrregularlyBin, Stack, etc.
1 parent baf38e7 commit 664eedd

File tree

3 files changed

+71
-71
lines changed

3 files changed

+71
-71
lines changed

histogrammar/primitives/categorize.py

Lines changed: 58 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -29,30 +29,30 @@ class Categorize(Factory, Container):
2929
"""
3030

3131
@staticmethod
32-
def ed(entries, contentType, pairsAsDict=None, **pairs):
32+
def ed(entries, contentType, binsAsDict=None, **bins):
3333
"""Create a Categorize that is only capable of being added.
3434
3535
Parameters:
3636
entries (float): the number of entries.
3737
contentType (str): the value's sub-aggregator type (must be provided to determine type for the case when `bins` is empty).
38-
pairs (dict from str to :doc:`Container <histogrammar.defs.Container>`): the non-empty bin categories and their values.
38+
bins (dict from str to :doc:`Container <histogrammar.defs.Container>`): the non-empty bin categories and their values.
3939
"""
4040
if not isinstance(entries, numbers.Real) and entries not in ("nan", "inf", "-inf"):
4141
raise TypeError("entries ({0}) must be a number".format(entries))
4242
if not isinstance(contentType, basestring):
4343
raise TypeError("contentType ({0}) must be a string".format(contentType))
44-
if not all(isinstance(k, basestring) and isinstance(v, Container) for k, v in pairs.items()):
45-
raise TypeError("pairs ({0}) must be a dict from strings to Containers".format(pairs))
44+
if not all(isinstance(k, basestring) and isinstance(v, Container) for k, v in bins.items()):
45+
raise TypeError("bins ({0}) must be a dict from strings to Containers".format(bins))
4646
if entries < 0.0:
4747
raise ValueError("entries ({0}) cannot be negative".format(entries))
4848

4949
out = Categorize(None, None)
5050
out.entries = float(entries)
51-
if pairsAsDict is None:
52-
out.pairs = {}
51+
if binsAsDict is None:
52+
out.bins = {}
5353
else:
54-
out.pairs = pairsAsDict
55-
out.pairs.update(pairs)
54+
out.bins = binsAsDict
55+
out.bins.update(bins)
5656
out.contentType = contentType
5757
return out.specialize()
5858

@@ -70,55 +70,55 @@ def __init__(self, quantity, value=Count()):
7070
7171
Other Parameters:
7272
entries (float): the number of entries, initially 0.0.
73-
pairs (dict from str to :doc:`Container <histogrammar.defs.Container>`): the map, probably a hashmap, to fill with values when their `entries` become non-zero.
73+
bins (dict from str to :doc:`Container <histogrammar.defs.Container>`): the map, probably a hashmap, to fill with values when their `entries` become non-zero.
7474
"""
7575
if value is not None and not isinstance(value, Container):
7676
raise TypeError("value ({0}) must be None or a Container".format(value))
7777
self.entries = 0.0
7878
self.quantity = serializable(quantity)
7979
self.value = value
80-
self.pairs = {}
80+
self.bins = {}
8181
if value is not None:
8282
self.contentType = str(value.factory.name)
8383
super(Categorize, self).__init__()
8484
self.specialize()
8585

8686
@property
87-
def pairsMap(self):
88-
"""Input ``pairs`` as a key-value map."""
89-
return self.pairs
87+
def binsMap(self):
88+
"""Input ``bins`` as a key-value map."""
89+
return self.bins
9090

9191
@property
9292
def size(self):
93-
"""Number of ``pairs``."""
94-
return len(self.pairs)
93+
"""Number of ``bins``."""
94+
return len(self.bins)
9595

9696
@property
9797
def keys(self):
98-
"""Iterable over the keys of the ``pairs``."""
99-
return self.pairs.keys()
98+
"""Iterable over the keys of the ``bins``."""
99+
return self.bins.keys()
100100

101101
@property
102102
def values(self):
103-
"""Iterable over the values of the ``pairs``."""
104-
return list(self.pairs.values())
103+
"""Iterable over the values of the ``bins``."""
104+
return list(self.bins.values())
105105

106106
@property
107107
def keySet(self):
108-
"""Set of keys among the ``pairs``."""
109-
return set(self.pairs.keys())
108+
"""Set of keys among the ``bins``."""
109+
return set(self.bins.keys())
110110

111111
def __call__(self, x):
112112
"""Attempt to get key ``x``, throwing an exception if it does not exist."""
113-
return self.pairs[x]
113+
return self.bins[x]
114114

115115
def get(self, x):
116116
"""Attempt to get key ``x``, returning ``None`` if it does not exist."""
117-
return self.pairs.get(x)
117+
return self.bins.get(x)
118118

119119
def getOrElse(self, x, default):
120120
"""Attempt to get key ``x``, returning an alternative if it does not exist."""
121-
return self.pairs.get(x, default)
121+
return self.bins.get(x, default)
122122

123123
@inheritdoc(Container)
124124
def zero(self): return Categorize(self.quantity, self.value)
@@ -128,14 +128,14 @@ def __add__(self, other):
128128
if isinstance(other, Categorize):
129129
out = Categorize(self.quantity, self.value)
130130
out.entries = self.entries + other.entries
131-
out.pairs = {}
131+
out.bins = {}
132132
for k in self.keySet.union(other.keySet):
133-
if k in self.pairs and k in other.pairs:
134-
out.pairs[k] = self.pairs[k] + other.pairs[k]
135-
elif k in self.pairs:
136-
out.pairs[k] = self.pairs[k].copy()
133+
if k in self.bins and k in other.bins:
134+
out.bins[k] = self.bins[k] + other.bins[k]
135+
elif k in self.bins:
136+
out.bins[k] = self.bins[k].copy()
137137
else:
138-
out.pairs[k] = other.pairs[k].copy()
138+
out.bins[k] = other.bins[k].copy()
139139
return out.specialize()
140140

141141
else:
@@ -150,9 +150,9 @@ def fill(self, datum, weight=1.0):
150150
if not isinstance(q, basestring):
151151
raise TypeError("function return value ({0}) must be a string".format(q))
152152

153-
if q not in self.pairs:
154-
self.pairs[q] = self.value.zero()
155-
self.pairs[q].fill(datum, weight)
153+
if q not in self.bins:
154+
self.bins[q] = self.value.zero()
155+
self.bins[q].fill(datum, weight)
156156

157157
# no possibility of exception from here on out (for rollback)
158158
self.entries += weight
@@ -161,29 +161,29 @@ def _cppGenerateCode(self, parser, generator, inputFieldNames, inputFieldTypes,
161161
normexpr = self._c99QuantityExpr(parser, generator, inputFieldNames, inputFieldTypes, derivedFieldTypes, derivedFieldExprs, None)
162162

163163
initCode.append(" " * initIndent + self._c99ExpandPrefix(*initPrefix) + ".entries = 0.0;")
164-
initCode.append(" " * initIndent + self._c99ExpandPrefix(*initPrefix) + ".pairs.clear();")
164+
initCode.append(" " * initIndent + self._c99ExpandPrefix(*initPrefix) + ".bins.clear();")
165165
fillCode.append(" " * fillIndent + self._c99ExpandPrefix(*fillPrefix) + ".entries += " + weightVarStack[-1] + ";")
166166

167167
value = "value_" + str(len(tmpVarTypes))
168168
tmpVarTypes[value] = self.value._c99StorageType() + "*"
169169

170-
fillCode.append("""{indent}if ({pairs}.find({q}) == {pairs}.end())
171-
{indent} {pairs}[{q}] = {prototype}; // copy
172-
{indent}{value} = &({pairs}[{q}]); // reference""".format(
170+
fillCode.append("""{indent}if ({bins}.find({q}) == {bins}.end())
171+
{indent} {bins}[{q}] = {prototype}; // copy
172+
{indent}{value} = &({bins}[{q}]); // reference""".format(
173173
indent = " " * fillIndent,
174174
q = normexpr,
175175
value = value,
176176
prototype = self._c99ExpandPrefix(*fillPrefix) + ".value",
177-
pairs = self._c99ExpandPrefix(*fillPrefix) + ".pairs"))
177+
bins = self._c99ExpandPrefix(*fillPrefix) + ".bins"))
178178

179179
self.value._c99GenerateCode(parser, generator, inputFieldNames, inputFieldTypes, derivedFieldTypes, derivedFieldExprs, storageStructs, initCode, initPrefix + (("var", "value"),), initIndent, fillCode, (("var", "(*" + value + ")"),), fillIndent, weightVars, weightVarStack, tmpVarTypes)
180180

181181
storageStructs[self._c99StructName()] = """
182182
typedef struct {{
183183
double entries;
184184
{1} value;
185-
std::unordered_map<std::string, {1}> pairs;
186-
{1}& getValues(std::string i) {{ return pairs[i]; }}
185+
std::unordered_map<std::string, {1}> bins;
186+
{1}& getValues(std::string i) {{ return bins[i]; }}
187187
}} {0};
188188
""".format(self._c99StructName(), self.value._c99StorageType())
189189

@@ -194,11 +194,11 @@ def _clingUpdate(self, filler, *extractorPrefix):
194194
obj = self._clingExpandPrefix(filler, *extractorPrefix)
195195
self.entries += obj.entries
196196

197-
for i in obj.pairs:
197+
for i in obj.bins:
198198
key = i.first
199-
if key not in self.pairs:
200-
self.pairs[key] = self.value.copy()
201-
self.pairs[key]._clingUpdate(obj, ("func", ["getValues", key]))
199+
if key not in self.bins:
200+
self.bins[key] = self.value.copy()
201+
self.bins[key]._clingUpdate(obj, ("func", ["getValues", key]))
202202

203203
def _c99StructName(self):
204204
return "Cz" + self.value._c99StructName()
@@ -212,17 +212,17 @@ def _numpy(self, data, weights, shape):
212212
# no possibility of exception from here on out (for rollback)
213213
for x, w in zip(q, weights):
214214
if w > 0.0:
215-
if x not in self.pairs:
216-
self.pairs[x] = self.value.zero()
217-
self.pairs[x].fill(x, w)
215+
if x not in self.bins:
216+
self.bins[x] = self.value.zero()
217+
self.bins[x].fill(x, w)
218218

219219
# no possibility of exception from here on out (for rollback)
220220
self.entries += float(weights.sum())
221221

222222
@property
223223
def children(self):
224224
"""List of sub-aggregators, to make it possible to walk the tree."""
225-
return [self.value] + list(self.pairs.values())
225+
return [self.value] + list(self.bins.values())
226226

227227
@inheritdoc(Container)
228228
def toJsonFragment(self, suppressName):
@@ -233,11 +233,11 @@ def toJsonFragment(self, suppressName):
233233
binsName = self.value.quantityName
234234
else:
235235
binsName = None
236-
elif len(self.pairs) > 0:
237-
if getattr(list(self.pairs.values())[0], "quantity", None) is not None:
238-
binsName = list(self.pairs.values())[0].quantity.name
239-
elif getattr(list(self.pairs.values())[0], "quantityName", None) is not None:
240-
binsName = list(self.pairs.values())[0].quantityName
236+
elif len(self.bins) > 0:
237+
if getattr(list(self.bins.values())[0], "quantity", None) is not None:
238+
binsName = list(self.bins.values())[0].quantity.name
239+
elif getattr(list(self.bins.values())[0], "quantityName", None) is not None:
240+
binsName = list(self.bins.values())[0].quantityName
241241
else:
242242
binsName = None
243243
else:
@@ -246,7 +246,7 @@ def toJsonFragment(self, suppressName):
246246
return maybeAdd({
247247
"entries": floatToJson(self.entries),
248248
"bins:type": self.value.name if self.value is not None else self.contentType,
249-
"bins": dict((k, v.toJsonFragment(True)) for k, v in self.pairs.items()),
249+
"bins": dict((k, v.toJsonFragment(True)) for k, v in self.bins.items()),
250250
}, **{"name": None if suppressName else self.quantity.name,
251251
"bins:name": binsName})
252252

@@ -280,11 +280,11 @@ def fromJsonFragment(json, nameFromParent):
280280
raise JsonFormatException(json["bins:name"], "Categorize.bins:name")
281281

282282
if isinstance(json["bins"], dict):
283-
pairs = dict((k, factory.fromJsonFragment(v, dataName)) for k, v in json["bins"].items())
283+
bins = dict((k, factory.fromJsonFragment(v, dataName)) for k, v in json["bins"].items())
284284
else:
285285
raise JsonFormatException(json, "Categorize.bins")
286286

287-
out = Categorize.ed(entries, contentType, **pairs)
287+
out = Categorize.ed(entries, contentType, **bins)
288288
out.quantity.name = nameFromParent if name is None else name
289289
return out.specialize()
290290

@@ -295,11 +295,11 @@ def __repr__(self):
295295
return "<Categorize values={0} size={1}".format(self.values[0].name if self.size > 0 else self.value.name if self.value is not None else self.contentType, self.size)
296296

297297
def __eq__(self, other):
298-
return isinstance(other, Categorize) and numeq(self.entries, other.entries) and self.quantity == other.quantity and self.pairs == other.pairs
298+
return isinstance(other, Categorize) and numeq(self.entries, other.entries) and self.quantity == other.quantity and self.bins == other.bins
299299

300300
def __ne__(self, other): return not self == other
301301

302302
def __hash__(self):
303-
return hash((self.entries, self.quantity, tuple(sorted(self.pairs.items()))))
303+
return hash((self.entries, self.quantity, tuple(sorted(self.bins.items()))))
304304

305305
Factory.register(Categorize)

test/specification.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -405,25 +405,25 @@ def IrregularlyBin_combine(one, two):
405405
def Categorize_fill(categorizing, datum, weight):
406406
if weight > 0.0:
407407
q = categorizing.quantity(datum)
408-
if q not in categorizing.pairs:
409-
categorizing.pairs[q] = categorizing.value.copy()
410-
fill(categorizing.pairs[q], datum, weight)
408+
if q not in categorizing.bins:
409+
categorizing.bins[q] = categorizing.value.copy()
410+
fill(categorizing.bins[q], datum, weight)
411411
categorizing.entries += weight
412412

413413
def Categorize_combine(one, two):
414414
if one.contentType != two.contentType:
415415
raise Exception
416416
entries = one.entries + two.entries
417417
contentType = one.contentType
418-
pairs = {}
419-
for key in set(one.pairs.keys()).union(set(two.pairs.keys())):
420-
if key in one.pairs and key in two.pairs:
421-
pairs[key] = combine(one.pairs[key], two.pairs[key])
422-
elif key in one.pairs:
423-
pairs[key] = one.pairs[key].copy()
424-
elif key in two.pairs:
425-
pairs[key] = two.pairs[key].copy()
426-
return Categorize.ed(entries, contentType, pairs)
418+
bins = {}
419+
for key in set(one.bins.keys()).union(set(two.bins.keys())):
420+
if key in one.bins and key in two.bins:
421+
bins[key] = combine(one.bins[key], two.bins[key])
422+
elif key in one.bins:
423+
bins[key] = one.bins[key].copy()
424+
elif key in two.bins:
425+
bins[key] = two.bins[key].copy()
426+
return Categorize.ed(entries, contentType, bins)
427427

428428
def Fraction_fill(fractioning, datum, weight):
429429
if weight > 0.0:

test/testbasic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,7 @@ def testCategorize(self):
803803
categorizing = Categorize(named("something", lambda x: x.string[0]))
804804
for _ in self.struct: categorizing.fill(_)
805805

806-
self.assertEqual(dict((k, v.entries) for k, v in categorizing.pairsMap.items()), {"n": 1.0, "e": 1.0, "t": 3.0, "s": 2.0, "f": 2.0, "o": 1.0})
806+
self.assertEqual(dict((k, v.entries) for k, v in categorizing.binsMap.items()), {"n": 1.0, "e": 1.0, "t": 3.0, "s": 2.0, "f": 2.0, "o": 1.0})
807807

808808
self.checkJson(categorizing)
809809
self.checkPickle(categorizing)

0 commit comments

Comments
 (0)