From 3dcfe65eb5fd287c5282a1785c175530c76ddb2e Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 15 Feb 2017 19:59:57 +0200 Subject: [PATCH 1/4] Experiment with flattening graphs for textual edit distance based match Signed-off-by: Nir Izraeli --- idaplugin/rematch/collectors/flat_graph.py | 60 ++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 idaplugin/rematch/collectors/flat_graph.py diff --git a/idaplugin/rematch/collectors/flat_graph.py b/idaplugin/rematch/collectors/flat_graph.py new file mode 100644 index 000000000..72579230d --- /dev/null +++ b/idaplugin/rematch/collectors/flat_graph.py @@ -0,0 +1,60 @@ +import idaapi + +from .vector import Vector + + +class FlatGraphVector(Vector): + type = 'flatgraph_editdistance' + type_version = 0 + + def __init__(self, *args, **kwargs): + super(FlatGraphVector, self).__init__(*args, **kwargs) + self.flow_chart = idaapi.FlowChart(idaapi.get_func(self.offset)) + self.visited = set() + self.items = list[self.flow_chart.size] + + def _bb_size(self, bb): + if bb.endEA > bb.startEA: + return bb.endEA - bb.startEA + + raise ValueError("while flattening graph, a basicblock that ends before " + "it starts encountered at {:x}".format(self.offset)) + + def _bb_value(self, bb): + # TODO: this should be something that's uncorellated with the order of + # basic blocks + return self._bb_size(bb) + + def _append_bbs(self, *bbs): + self.items.extend(map(self._bb_value, bbs)) + + def _find_head(self): + def is_head(bb): + return len(bb.preds()) == 0 + + heads = filter(is_head, self.flow_chart) + if len(heads) == 1: + return heads[0] + + msg = ("flattening graphs with head count other than 1 is not supported, " + "got {} head-count for {:x}".format(len(heads), self.offset)) + raise ValueError(msg) + + def _sort_siblings(self, siblings): + return sorted(siblings, key=self._bb_size) + + def _recurse_bb(self, bb): + if bb in self.visited: + return [] + + self.visited.add(bb) + siblings = self._sort_siblings(bb.succs()) + self._append_bbs(*siblings) + + for sibling in siblings: + self._recurse_siblings(sibling) + + def _data(self): + head = self._find_head() + self._recurse_bb(head) + return self.items From 4b784bd53ad3140f93e70d3a92f21ef8e6372c2e Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sun, 18 Jun 2017 15:46:24 -0400 Subject: [PATCH 2/4] Connect new flag graph vector to plugin Signed-off-by: Nir Izraeli --- idaplugin/rematch/collectors/vectors/__init__.py | 3 ++- idaplugin/rematch/instances/function.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/idaplugin/rematch/collectors/vectors/__init__.py b/idaplugin/rematch/collectors/vectors/__init__.py index 767069c9f..61b390aff 100644 --- a/idaplugin/rematch/collectors/vectors/__init__.py +++ b/idaplugin/rematch/collectors/vectors/__init__.py @@ -5,8 +5,9 @@ from .assembly_hash import AssemblyHashVector from .mnemonic_hash import MnemonicHashVector from .mnemonic_hist import MnemonicHistVector +from .flat_graph import FlatGraphVector __all__ = ["Vector", "InstructionHashVector", "IdentityHashVector", "NameHashVector", "AssemblyHashVector", "MnemonicHashVector", - "MnemonicHistVector"] + "MnemonicHistVector", "FlatGraphVector"] diff --git a/idaplugin/rematch/instances/function.py b/idaplugin/rematch/instances/function.py index 5116b4487..290c1888b 100755 --- a/idaplugin/rematch/instances/function.py +++ b/idaplugin/rematch/instances/function.py @@ -21,7 +21,8 @@ def __init__(self, *args, **kwargs): collectors.vectors.IdentityHashVector, collectors.vectors.AssemblyHashVector, collectors.vectors.MnemonicHashVector, - collectors.vectors.MnemonicHistVector} + collectors.vectors.MnemonicHistVector, + collectors.vectors.FlatGraphVector} self.annotations |= {collectors.annotations.AssemblyAnnotation} def size(self): From 59166819aaffe605837f74089a4e7dbe95a55181 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sun, 18 Jun 2017 16:01:45 -0400 Subject: [PATCH 3/4] fix flat graph after rebase Signed-off-by: Nir Izraeli --- .../rematch/collectors/{ => vectors}/flat_graph.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) rename idaplugin/rematch/collectors/{ => vectors}/flat_graph.py (82%) diff --git a/idaplugin/rematch/collectors/flat_graph.py b/idaplugin/rematch/collectors/vectors/flat_graph.py similarity index 82% rename from idaplugin/rematch/collectors/flat_graph.py rename to idaplugin/rematch/collectors/vectors/flat_graph.py index 72579230d..ba48d9a6f 100644 --- a/idaplugin/rematch/collectors/flat_graph.py +++ b/idaplugin/rematch/collectors/vectors/flat_graph.py @@ -9,9 +9,9 @@ class FlatGraphVector(Vector): def __init__(self, *args, **kwargs): super(FlatGraphVector, self).__init__(*args, **kwargs) - self.flow_chart = idaapi.FlowChart(idaapi.get_func(self.offset)) + self.flow_chart = None self.visited = set() - self.items = list[self.flow_chart.size] + self.items = list() def _bb_size(self, bb): if bb.endEA > bb.startEA: @@ -22,7 +22,8 @@ def _bb_size(self, bb): def _bb_value(self, bb): # TODO: this should be something that's uncorellated with the order of - # basic blocks + # basic blocks and describes basic blocks well + # Some kind of hash for mnemonics could be used return self._bb_size(bb) def _append_bbs(self, *bbs): @@ -45,7 +46,7 @@ def _sort_siblings(self, siblings): def _recurse_bb(self, bb): if bb in self.visited: - return [] + return self.visited.add(bb) siblings = self._sort_siblings(bb.succs()) @@ -54,7 +55,9 @@ def _recurse_bb(self, bb): for sibling in siblings: self._recurse_siblings(sibling) - def _data(self): + def data(self, offset): + self.flow_chart = idaapi.FlowChart(idaapi.get_func(offset)) + self.items.append(self.flow_chart.size) head = self._find_head() self._recurse_bb(head) return self.items From ba071d5719df83d4fb9cb735488fecb0c2d9da59 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Mon, 19 Jun 2017 13:39:22 -0400 Subject: [PATCH 4/4] Rename class and file Signed-off-by: Nir Izraeli --- idaplugin/rematch/collectors/vectors/__init__.py | 4 ++-- .../vectors/{flat_graph.py => flatgraph_editdistance.py} | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) rename idaplugin/rematch/collectors/vectors/{flat_graph.py => flatgraph_editdistance.py} (93%) diff --git a/idaplugin/rematch/collectors/vectors/__init__.py b/idaplugin/rematch/collectors/vectors/__init__.py index 61b390aff..f64687c24 100644 --- a/idaplugin/rematch/collectors/vectors/__init__.py +++ b/idaplugin/rematch/collectors/vectors/__init__.py @@ -5,9 +5,9 @@ from .assembly_hash import AssemblyHashVector from .mnemonic_hash import MnemonicHashVector from .mnemonic_hist import MnemonicHistVector -from .flat_graph import FlatGraphVector +from .flatgraph_editdistance import FlatGraphEditDistanceVector __all__ = ["Vector", "InstructionHashVector", "IdentityHashVector", "NameHashVector", "AssemblyHashVector", "MnemonicHashVector", - "MnemonicHistVector", "FlatGraphVector"] + "MnemonicHistVector", "FlatGraphEditDistanceVector"] diff --git a/idaplugin/rematch/collectors/vectors/flat_graph.py b/idaplugin/rematch/collectors/vectors/flatgraph_editdistance.py similarity index 93% rename from idaplugin/rematch/collectors/vectors/flat_graph.py rename to idaplugin/rematch/collectors/vectors/flatgraph_editdistance.py index ba48d9a6f..444d35265 100644 --- a/idaplugin/rematch/collectors/vectors/flat_graph.py +++ b/idaplugin/rematch/collectors/vectors/flatgraph_editdistance.py @@ -3,12 +3,12 @@ from .vector import Vector -class FlatGraphVector(Vector): +class FlatGraphEditDistanceVector(Vector): type = 'flatgraph_editdistance' type_version = 0 def __init__(self, *args, **kwargs): - super(FlatGraphVector, self).__init__(*args, **kwargs) + super(FlatGraphEditDistanceVector, self).__init__(*args, **kwargs) self.flow_chart = None self.visited = set() self.items = list()