From eb2fe38acf3670d53a66ae1efccb1d0ee15e32d9 Mon Sep 17 00:00:00 2001
From: Kristopher Borer <kris@key.me>
Date: Wed, 17 May 2017 20:29:10 -0400
Subject: [PATCH 1/2] add case for tab character in vocabulary

The vocabulary sources file is tab separated, so a special case is needed for
when the tab character is part of the vocabulary in character-level models.
---
 seq2seq/data/vocab.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/seq2seq/data/vocab.py b/seq2seq/data/vocab.py
index e4d672ec..06546cd2 100644
--- a/seq2seq/data/vocab.py
+++ b/seq2seq/data/vocab.py
@@ -84,7 +84,10 @@ def create_vocabulary_lookup_table(filename, default_value=None):
 
   has_counts = len(vocab[0].split("\t")) == 2
   if has_counts:
-    vocab, counts = zip(*[_.split("\t") for _ in vocab])
+    pairs = [["\t", line.split("\t")[-1]]
+              if line.startswith("\t\t") else line.split("\t")
+              for line in vocab]
+    vocab, counts = zip(*pairs)
     counts = [float(_) for _ in counts]
     vocab = list(vocab)
   else:

From e37ea7a71cd7e15072bdd1d240f9d99f52e4fa63 Mon Sep 17 00:00:00 2001
From: Kristopher Borer <kris@key.me>
Date: Thu, 18 May 2017 08:37:53 -0400
Subject: [PATCH 2/2] fix failing test

---
 seq2seq/test/hooks_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/seq2seq/test/hooks_test.py b/seq2seq/test/hooks_test.py
index dedc6594..21fae653 100644
--- a/seq2seq/test/hooks_test.py
+++ b/seq2seq/test/hooks_test.py
@@ -47,7 +47,7 @@ def test_begin(self):
     with gfile.GFile(os.path.join(model_dir, "model_analysis.txt")) as file:
       file_contents = file.read().strip()
 
-    self.assertEqual(file_contents.decode(), "_TFProfRoot (--/16.38k params)\n"
+    self.assertEqual(file_contents, "_TFProfRoot (--/16.38k params)\n"
                      "  weigths (128x128, 16.38k/16.38k params)")
     outfile.close()