From 38289f25ad201775e5135367886844f391110f04 Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Wed, 4 Nov 2020 23:05:11 -0600
Subject: [PATCH 01/10] Making http-response more robust to demonstrate
 real-world usage

---
 http-response/README.md     |  13 ++-
 http-response/background.js | 157 +++++++++++++++++++++++++++++++++---
 http-response/manifest.json |   2 +-
 3 files changed, 156 insertions(+), 16 deletions(-)

diff --git a/http-response/README.md b/http-response/README.md
index 40c23380..25d3f453 100755
--- a/http-response/README.md
+++ b/http-response/README.md
@@ -2,10 +2,19 @@
 
 ## What it does
 
-Listens to HTTP Responses from example.com and changes the body of the response as it comes through. So that the word "Example" on https://example.com becomes "WebExtension Example".
+Listens to HTTP Responses from example.com and w3.org and changes "Example" to "WebExtension Example" and
+"Test" to "WebExtension Test" in the web pages contents. 
 
 ## What it shows
 
-How to use the response parser on bytes.
+A real-world example of WebRequest that shows three important details not always found in beginning examples:
+ - The accumulation of data through multiple calls to .ondata
+ - The decoding of binary data to text in a streaming fashion.
+ - Text decoding that tries to respect the page's reported encoding via Content-Type.
+
+The domain w3.org is included in the list of domains to allow for testing against [this suite of standardized tests](https://www.w3.org/2006/11/mwbp-tests/index.xhtml)
+regarding text encoding. Tests #1-8 pass, test #9 currently fails.
+
+## Credits
 
 Icon is from: https://www.iconfinder.com/icons/763339/draw_edit_editor_pen_pencil_tool_write_icon#size=128
diff --git a/http-response/background.js b/http-response/background.js
index b7a78800..2693e02b 100755
--- a/http-response/background.js
+++ b/http-response/background.js
@@ -1,22 +1,153 @@
 function listener(details) {
+  // The received data is a stream of bytes. In order to do text-based
+  // modifications, it is necessary to decode the bytes into a string
+  // using the proper character encoding, do any modifications, then
+  // encode back into a stream of bytes.
+  // Historically, detecting character encoding has been a tricky task
+  // taken on by the browser. Here, a simplified approach is taken
+  // and the complexity is hidden in a helper method.
+  let decoder, encoder;
+  [decoder, encoder] = detectCharsetAndSetupDecoderEncoder(details);
   let filter = browser.webRequest.filterResponseData(details.requestId);
-  let decoder = new TextDecoder("utf-8");
-  let encoder = new TextEncoder();
+  let fullStr = '';
+  
+  filter.ondata = e => {
+    // Note that the event's data may break in the middle of an encoded
+    // character - the stream parameter is critical for success as this
+    // method gets called multiple times.
+    let str = decoder.decode(e.data, {stream: true});
+    fullStr += str;
+  }
+  
+  filter.onstop = async e => {
+    // Just change any instance of Example or Test in the HTTP response
+    // to WebExtension Example or WebExtension Test.
+    let mutatedStr = fullStr.replace(/Example/g, 'WebExtension Example');
+    mutatedStr = mutatedStr.replace(/Test/g, 'WebExtension Test');
+    filter.write(encoder.encode(mutatedStr));
+    filter.close();
+  }
 
-  filter.ondata = event => {
-    let str = decoder.decode(event.data, {stream: true});
-    // Just change any instance of Example in the HTTP response
-    // to WebExtension Example.
-    str = str.replace(/Example/g, 'WebExtension Example');
-    filter.write(encoder.encode(str));
-    filter.disconnect();
+  filter.onerror = e => {
+    try {
+        filter.close();
+        console.log('Filter error: '+e+', '+ex);
+    } catch(ex) {
+        console.log('Filter error while closing: '+e+', '+ex);
+    }
   }
 
-  return {};
+  // Because details response headers have been mutated, return it
+  return details;
 }
 
-browser.webRequest.onBeforeRequest.addListener(
+browser.webRequest.onHeadersReceived.addListener(
   listener,
-  {urls: ["https://example.com/*"], types: ["main_frame"]},
-  ["blocking"]
+  {
+    urls: ["https://example.com/*", "https://www.w3.org/*"], // Include W3 for testing charset detection.
+    types: ["main_frame"]
+  },
+  ["blocking","responseHeaders"]
 );
+
+// This helper method does a few things regarding character encoding:
+// 1) Detects the charset for the TextDecoder so that bytes are properly turned into strings
+// 2) Ensures the output Content-Type is UTF-8 because that is what TextEncoder supports
+// 3) Returns the decoder/encoder pair
+function detectCharsetAndSetupDecoderEncoder(details) {
+  let contentType = '';
+  let headerIndex = -1;
+  for(let i=0; i<details.responseHeaders.length; i++) {
+      let header = details.responseHeaders[i];
+      if(header.name.toLowerCase() == "content-type") {
+          contentType = header.value.toLowerCase();
+          headerIndex = i;
+          break;
+      }
+  }
+  if (headerIndex == -1) {
+    console.log('No Content-Type header detected for '+details.url+', adding one.');
+    headerIndex = details.responseHeaders.length;
+    contentType = 'text/html';
+    details.responseHeaders.push(
+      {
+        "name": "Content-Type",
+        "value":"text/html"
+      }
+    );
+  }
+
+  let baseType;
+  if(contentType.trim().startsWith('text/html')) {
+    baseType = 'text/html';
+    console.log('Detected base type was '+baseType);
+  } else if(contentType.trim().startsWith('application/xhtml+xml')) {
+    baseType = 'application/xhtml+xml';
+    console.log('Detected base type was '+baseType);
+  } else {
+    baseType = 'text/html';
+    console.log('The Content-Type was '+contentType+', not text/html or application/xhtml+xml - results might be strange.');
+  }
+
+  // It is important to detect the charset to correctly initialize TextDecoder or
+  // else we run into garbage output sometimes.
+  // However, TextEncoder does NOT support other than 'utf-8', so it is necessary
+  // to change the Content-Type on the header to UTF-8
+  // If modifying this block of code, ensure that the tests at
+  // https://www.w3.org/2006/11/mwbp-tests/index.xhtml
+  // all pass - current implementation only fails on #9 but this detection ensures
+  // tests #3,4,5, and 8 pass.
+  let decodingCharset = 'utf-8';
+  let detectedCharset = detectCharset(contentType);
+
+  if(detectedCharset !== undefined) {
+      decodingCharset = detectedCharset;
+      console.log('Detected charset was ' + decodingCharset + ' for ' + details.url);
+  }
+  details.responseHeaders[headerIndex].value = baseType+';charset=utf-8';
+
+  let decoder = new TextDecoder(decodingCharset);
+  let encoder = new TextEncoder(); //Encoder does not support non-UTF-8 charsets so this is always utf-8.
+
+  return [decoder,encoder];
+}
+
+// Detect the charset from Content-Type
+function detectCharset(contentType) {
+  /*
+  From https://tools.ietf.org/html/rfc7231#section-3.1.1.5:
+
+  A parameter value that matches the token production can be
+  transmitted either as a token or within a quoted-string.  The quoted
+  and unquoted values are equivalent.  For example, the following
+  examples are all equivalent, but the first is preferred for
+  consistency:
+
+  text/html;charset=utf-8
+  text/html;charset=UTF-8
+  Text/HTML;Charset="utf-8"
+  text/html; charset="utf-8"
+
+  Internet media types ought to be registered with IANA according to
+  the procedures defined in [BCP13].
+
+  Note: Unlike some similar constructs in other header fields, media
+  type parameters do not allow whitespace (even "bad" whitespace)
+  around the "=" character.
+
+  ...
+
+  And regarding application/xhtml+xml, from https://tools.ietf.org/html/rfc3236#section-2
+  and the referenced links, it can be seen that charset is handled the same way with
+  respect to Content-Type.
+  */
+
+  let charsetMarker = "charset="; // Spaces *shouldn't* matter
+  let foundIndex = contentType.indexOf(charsetMarker);
+  if (foundIndex == -1) {
+      return undefined;
+  }
+  let charsetMaybeQuoted = contentType.substr(foundIndex+charsetMarker.length).trim();
+  let charset = charsetMaybeQuoted.replace(/\"/g, '');
+  return charset;
+}
\ No newline at end of file
diff --git a/http-response/manifest.json b/http-response/manifest.json
index 3e3d75cd..bac4038b 100755
--- a/http-response/manifest.json
+++ b/http-response/manifest.json
@@ -10,7 +10,7 @@
   },
 
   "permissions": [
-    "webRequest", "webRequestBlocking", "https://example.com/*"
+    "webRequest", "webRequestBlocking", "https://example.com/*", "https://www.w3.org/*"
   ],
 
   "background": {

From da9856ef900d68477aab8553096b972bfd645ec1 Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Wed, 4 Nov 2020 23:11:07 -0600
Subject: [PATCH 02/10] Small typos

---
 http-response/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/http-response/README.md b/http-response/README.md
index 25d3f453..f5b040ca 100755
--- a/http-response/README.md
+++ b/http-response/README.md
@@ -3,12 +3,12 @@
 ## What it does
 
 Listens to HTTP Responses from example.com and w3.org and changes "Example" to "WebExtension Example" and
-"Test" to "WebExtension Test" in the web pages contents. 
+"Test" to "WebExtension Test" in the web pages' contents. 
 
 ## What it shows
 
 A real-world example of WebRequest that shows three important details not always found in beginning examples:
- - The accumulation of data through multiple calls to .ondata
+ - The accumulation of data through multiple calls to `.ondata`
  - The decoding of binary data to text in a streaming fashion.
  - Text decoding that tries to respect the page's reported encoding via Content-Type.
 

From 334cb109a5788963b1d2c8ebd01f594fa9a93872 Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Wed, 4 Nov 2020 23:40:34 -0600
Subject: [PATCH 03/10] Ensure TextDecoder flushes at the end

---
 http-response/background.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/http-response/background.js b/http-response/background.js
index 2693e02b..ecf29396 100755
--- a/http-response/background.js
+++ b/http-response/background.js
@@ -20,6 +20,7 @@ function listener(details) {
   }
   
   filter.onstop = async e => {
+    fullStr += decoder.decode(); //Flush the buffer
     // Just change any instance of Example or Test in the HTTP response
     // to WebExtension Example or WebExtension Test.
     let mutatedStr = fullStr.replace(/Example/g, 'WebExtension Example');

From 8f5f7001b982668978c9d04388c9159b02c8a5d7 Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Thu, 5 Nov 2020 08:34:50 -0600
Subject: [PATCH 04/10] Travis is complaining about the escaping of quote being
 useless

---
 http-response/background.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/http-response/background.js b/http-response/background.js
index ecf29396..96c0fa1a 100755
--- a/http-response/background.js
+++ b/http-response/background.js
@@ -149,6 +149,6 @@ function detectCharset(contentType) {
       return undefined;
   }
   let charsetMaybeQuoted = contentType.substr(foundIndex+charsetMarker.length).trim();
-  let charset = charsetMaybeQuoted.replace(/\"/g, '');
+  let charset = charsetMaybeQuoted.replace(/"/g, '');
   return charset;
 }
\ No newline at end of file

From dea141bf2e34ea50119ac00dae3ad28eecb4f5f4 Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Wed, 11 Nov 2020 21:08:31 -0600
Subject: [PATCH 05/10] Removing Example/example.com

---
 http-response/README.md     | 3 +--
 http-response/background.js | 8 +++-----
 http-response/manifest.json | 2 +-
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/http-response/README.md b/http-response/README.md
index f5b040ca..c8098932 100755
--- a/http-response/README.md
+++ b/http-response/README.md
@@ -2,8 +2,7 @@
 
 ## What it does
 
-Listens to HTTP Responses from example.com and w3.org and changes "Example" to "WebExtension Example" and
-"Test" to "WebExtension Test" in the web pages' contents. 
+Listens to HTTP Responses from example.com and w3.org and changes "Test" to "WebExtension Test" in the web pages' contents. 
 
 ## What it shows
 
diff --git a/http-response/background.js b/http-response/background.js
index 96c0fa1a..193a7cc8 100755
--- a/http-response/background.js
+++ b/http-response/background.js
@@ -21,10 +21,8 @@ function listener(details) {
   
   filter.onstop = async e => {
     fullStr += decoder.decode(); //Flush the buffer
-    // Just change any instance of Example or Test in the HTTP response
-    // to WebExtension Example or WebExtension Test.
-    let mutatedStr = fullStr.replace(/Example/g, 'WebExtension Example');
-    mutatedStr = mutatedStr.replace(/Test/g, 'WebExtension Test');
+    // Just change any instance of Test in the HTTP response to WebExtension Test.
+    let mutatedStr = mutatedStr.replace(/Test/g, 'WebExtension Test');
     filter.write(encoder.encode(mutatedStr));
     filter.close();
   }
@@ -45,7 +43,7 @@ function listener(details) {
 browser.webRequest.onHeadersReceived.addListener(
   listener,
   {
-    urls: ["https://example.com/*", "https://www.w3.org/*"], // Include W3 for testing charset detection.
+    urls: ["https://www.w3.org/*"], // Include W3 for testing charset detection.
     types: ["main_frame"]
   },
   ["blocking","responseHeaders"]
diff --git a/http-response/manifest.json b/http-response/manifest.json
index bac4038b..4bc6ba5e 100755
--- a/http-response/manifest.json
+++ b/http-response/manifest.json
@@ -10,7 +10,7 @@
   },
 
   "permissions": [
-    "webRequest", "webRequestBlocking", "https://example.com/*", "https://www.w3.org/*"
+    "webRequest", "webRequestBlocking", "https://www.w3.org/*"
   ],
 
   "background": {

From 74be63d97d9d51a0b73bf20fcafdbb8e0ac41369 Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Wed, 11 Nov 2020 21:10:19 -0600
Subject: [PATCH 06/10] Removing unhelpful error handling

---
 http-response/background.js | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/http-response/background.js b/http-response/background.js
index 193a7cc8..4e4b21a7 100755
--- a/http-response/background.js
+++ b/http-response/background.js
@@ -27,15 +27,6 @@ function listener(details) {
     filter.close();
   }
 
-  filter.onerror = e => {
-    try {
-        filter.close();
-        console.log('Filter error: '+e+', '+ex);
-    } catch(ex) {
-        console.log('Filter error while closing: '+e+', '+ex);
-    }
-  }
-
   // Because details response headers have been mutated, return it
   return details;
 }

From 5b58552b61f47970ed7adabafc0274224380941b Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Wed, 11 Nov 2020 22:34:53 -0600
Subject: [PATCH 07/10] HTTP response code check and fixing mutatedStr
 regression

---
 http-response/background.js | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/http-response/background.js b/http-response/background.js
index 4e4b21a7..b7367bc3 100755
--- a/http-response/background.js
+++ b/http-response/background.js
@@ -1,4 +1,8 @@
 function listener(details) {
+  // If the HTTP response code is not OK, just let it flow through normally.
+  if (details.statusCode < 200 || 300 <= details.statusCode) {
+    return details;
+  }
   // The received data is a stream of bytes. In order to do text-based
   // modifications, it is necessary to decode the bytes into a string
   // using the proper character encoding, do any modifications, then
@@ -22,7 +26,7 @@ function listener(details) {
   filter.onstop = async e => {
     fullStr += decoder.decode(); //Flush the buffer
     // Just change any instance of Test in the HTTP response to WebExtension Test.
-    let mutatedStr = mutatedStr.replace(/Test/g, 'WebExtension Test');
+    let mutatedStr = fullStr.replace(/Test/g, 'WebExtension Test');
     filter.write(encoder.encode(mutatedStr));
     filter.close();
   }

From 6e374f5e34b7a753e2a6fe061cc78d49a132eb44 Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Thu, 12 Nov 2020 19:48:47 -0600
Subject: [PATCH 08/10] Trying to clean up charset detection code

---
 http-response/background.js | 178 ++++++++++++++----------------------
 1 file changed, 68 insertions(+), 110 deletions(-)

diff --git a/http-response/background.js b/http-response/background.js
index b7367bc3..e2b1d4e4 100755
--- a/http-response/background.js
+++ b/http-response/background.js
@@ -1,17 +1,64 @@
 function listener(details) {
   // If the HTTP response code is not OK, just let it flow through normally.
   if (details.statusCode < 200 || 300 <= details.statusCode) {
-    return details;
+    console.log('HTTP Status Code was '+details.statusCode+' not 2XX for '+details.url+', skipping filtering.');
+    return;
   }
+
   // The received data is a stream of bytes. In order to do text-based
   // modifications, it is necessary to decode the bytes into a string
   // using the proper character encoding, do any modifications, then
   // encode back into a stream of bytes.
-  // Historically, detecting character encoding has been a tricky task
-  // taken on by the browser. Here, a simplified approach is taken
-  // and the complexity is hidden in a helper method.
-  let decoder, encoder;
-  [decoder, encoder] = detectCharsetAndSetupDecoderEncoder(details);
+  //
+  // In order to use the correct decoding, one needs to detect the charset.
+  // Please note that there are many complex rules to detect the charset,
+  // and no approach with scanning only the response headers will be
+  // fully accurate. The simplified approach here is to find the
+  // Content-Type and extract the charset if found.
+
+  let {responseHeaders} = details;
+
+  // Find the last Content-Type header.
+  let contentTypeHeader = responseHeaders
+        .slice().reverse()
+        .find(h => h.name.toLowerCase() == "content-type");
+
+  // If Content-Type header is not set, the browser is going to do content-sniffing,
+  // and we should also return to avoid trouble (e.g. breaking downloads, PDFs, videos, ...).
+  if (contentTypeHeader === undefined) {
+    console.log('Content-Type header not found for '+details.url+', skipping filtering');
+    return;
+  }
+
+  // If it not a supported content type, we will return rather than guess.
+  let baseType;
+  let contentType = contentTypeHeader.value.trim();
+  if(contentType.startsWith('text/html')) {
+    baseType = 'text/html';
+  } else if (contentType.startsWith('application/xhtml+xml')) {
+    baseType = 'application/xhtml+xml';
+  } else {
+    console.log('Content type '+contentType+' not supported for '+details.url+', skipping filtering.');
+    return;
+  }
+
+  // Set up TextDecoder
+  console.log('Initial checks passed, beginning charset detection for '+details.url);
+  let charset = detectCharset(contentType) || 'utf-8';
+  let decoder = new TextDecoder(charset);
+  console.log('The detected charset was '+charset+' for '+details.url);
+
+  // While TextDecoder supports most charset encodings, TextEncoder does NOT support
+  // other than 'utf-8', so it is necessary to change the Content-Type on the header
+  // to UTF-8. If modifying this block of code, ensure that the tests at
+  // https://www.w3.org/2006/11/mwbp-tests/index.xhtml
+  // pass - current implementation only fails on #9 but this detection ensures
+  // tests #3,4,5, and 8 pass.
+  let encoder = new TextEncoder(); 
+  contentTypeHeader.value = baseType+';charset=utf-8';
+
+  
+  // Now the actual filtering can begin!
   let filter = browser.webRequest.filterResponseData(details.requestId);
   let fullStr = '';
   
@@ -31,10 +78,23 @@ function listener(details) {
     filter.close();
   }
 
-  // Because details response headers have been mutated, return it
+  // Because details response headers have been mutated, return them
   return details;
 }
 
+// This code tries to snag the last charset indicated
+// but is still not robust to poorly formed inputs.
+function detectCharset(contentType) {
+  let charsetMarker = "charset=";
+  let foundIndex = contentType.lastIndexOf(charsetMarker);
+  if (foundIndex == -1) {
+      return undefined;
+  }
+  let charsetMaybeQuoted = contentType.substr(foundIndex+charsetMarker.length).trim().toLowerCase();
+  let charset = charsetMaybeQuoted.replace(/"/g, '');
+  return charset;
+}
+
 browser.webRequest.onHeadersReceived.addListener(
   listener,
   {
@@ -42,106 +102,4 @@ browser.webRequest.onHeadersReceived.addListener(
     types: ["main_frame"]
   },
   ["blocking","responseHeaders"]
-);
-
-// This helper method does a few things regarding character encoding:
-// 1) Detects the charset for the TextDecoder so that bytes are properly turned into strings
-// 2) Ensures the output Content-Type is UTF-8 because that is what TextEncoder supports
-// 3) Returns the decoder/encoder pair
-function detectCharsetAndSetupDecoderEncoder(details) {
-  let contentType = '';
-  let headerIndex = -1;
-  for(let i=0; i<details.responseHeaders.length; i++) {
-      let header = details.responseHeaders[i];
-      if(header.name.toLowerCase() == "content-type") {
-          contentType = header.value.toLowerCase();
-          headerIndex = i;
-          break;
-      }
-  }
-  if (headerIndex == -1) {
-    console.log('No Content-Type header detected for '+details.url+', adding one.');
-    headerIndex = details.responseHeaders.length;
-    contentType = 'text/html';
-    details.responseHeaders.push(
-      {
-        "name": "Content-Type",
-        "value":"text/html"
-      }
-    );
-  }
-
-  let baseType;
-  if(contentType.trim().startsWith('text/html')) {
-    baseType = 'text/html';
-    console.log('Detected base type was '+baseType);
-  } else if(contentType.trim().startsWith('application/xhtml+xml')) {
-    baseType = 'application/xhtml+xml';
-    console.log('Detected base type was '+baseType);
-  } else {
-    baseType = 'text/html';
-    console.log('The Content-Type was '+contentType+', not text/html or application/xhtml+xml - results might be strange.');
-  }
-
-  // It is important to detect the charset to correctly initialize TextDecoder or
-  // else we run into garbage output sometimes.
-  // However, TextEncoder does NOT support other than 'utf-8', so it is necessary
-  // to change the Content-Type on the header to UTF-8
-  // If modifying this block of code, ensure that the tests at
-  // https://www.w3.org/2006/11/mwbp-tests/index.xhtml
-  // all pass - current implementation only fails on #9 but this detection ensures
-  // tests #3,4,5, and 8 pass.
-  let decodingCharset = 'utf-8';
-  let detectedCharset = detectCharset(contentType);
-
-  if(detectedCharset !== undefined) {
-      decodingCharset = detectedCharset;
-      console.log('Detected charset was ' + decodingCharset + ' for ' + details.url);
-  }
-  details.responseHeaders[headerIndex].value = baseType+';charset=utf-8';
-
-  let decoder = new TextDecoder(decodingCharset);
-  let encoder = new TextEncoder(); //Encoder does not support non-UTF-8 charsets so this is always utf-8.
-
-  return [decoder,encoder];
-}
-
-// Detect the charset from Content-Type
-function detectCharset(contentType) {
-  /*
-  From https://tools.ietf.org/html/rfc7231#section-3.1.1.5:
-
-  A parameter value that matches the token production can be
-  transmitted either as a token or within a quoted-string.  The quoted
-  and unquoted values are equivalent.  For example, the following
-  examples are all equivalent, but the first is preferred for
-  consistency:
-
-  text/html;charset=utf-8
-  text/html;charset=UTF-8
-  Text/HTML;Charset="utf-8"
-  text/html; charset="utf-8"
-
-  Internet media types ought to be registered with IANA according to
-  the procedures defined in [BCP13].
-
-  Note: Unlike some similar constructs in other header fields, media
-  type parameters do not allow whitespace (even "bad" whitespace)
-  around the "=" character.
-
-  ...
-
-  And regarding application/xhtml+xml, from https://tools.ietf.org/html/rfc3236#section-2
-  and the referenced links, it can be seen that charset is handled the same way with
-  respect to Content-Type.
-  */
-
-  let charsetMarker = "charset="; // Spaces *shouldn't* matter
-  let foundIndex = contentType.indexOf(charsetMarker);
-  if (foundIndex == -1) {
-      return undefined;
-  }
-  let charsetMaybeQuoted = contentType.substr(foundIndex+charsetMarker.length).trim();
-  let charset = charsetMaybeQuoted.replace(/"/g, '');
-  return charset;
-}
\ No newline at end of file
+);
\ No newline at end of file

From d518c152f1da312f1c7bbddbf9ba5d58f91eb749 Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Sun, 15 Nov 2020 16:05:27 -0600
Subject: [PATCH 09/10] Streaming replacement improvements and a bit of cleanup

---
 http-response/README.md     | 13 +++++++++----
 http-response/background.js | 31 ++++++++++++++++++++++---------
 2 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/http-response/README.md b/http-response/README.md
index c8098932..be4252bc 100755
--- a/http-response/README.md
+++ b/http-response/README.md
@@ -2,16 +2,21 @@
 
 ## What it does
 
-Listens to HTTP Responses from example.com and w3.org and changes "Test" to "WebExtension Test" in the web pages' contents. 
+Listens to HTTP Responses from w3.org and changes "Test" to "WebExtension Check" in the web pages' contents. 
 
 ## What it shows
 
-A real-world example of WebRequest that shows three important details not always found in beginning examples:
- - The accumulation of data through multiple calls to `.ondata`
+A real-world example of WebRequest that shows four important details not always found in beginning examples:
+ - The accumulation of data through multiple calls to `.ondata`.
  - The decoding of binary data to text in a streaming fashion.
  - Text decoding that tries to respect the page's reported encoding via Content-Type.
+ - The encoding of replaced data back to `filter.write` in a streaming fashion.
 
-The domain w3.org is included in the list of domains to allow for testing against [this suite of standardized tests](https://www.w3.org/2006/11/mwbp-tests/index.xhtml)
+ Note that both correctly detecting the character encoding and performing streaming replacements are deeper subjects
+ than can be fully covered in a small example but that this code provides a starting point for solving these problems
+ in your own solution.
+
+The domain w3.org is included in the list of domains to allow testing against [this suite of standardized tests](https://www.w3.org/2006/11/mwbp-tests/index.xhtml)
 regarding text encoding. Tests #1-8 pass, test #9 currently fails.
 
 ## Credits
diff --git a/http-response/background.js b/http-response/background.js
index e2b1d4e4..87a3ce89 100755
--- a/http-response/background.js
+++ b/http-response/background.js
@@ -53,28 +53,40 @@ function listener(details) {
   // to UTF-8. If modifying this block of code, ensure that the tests at
   // https://www.w3.org/2006/11/mwbp-tests/index.xhtml
   // pass - current implementation only fails on #9 but this detection ensures
-  // tests #3,4,5, and 8 pass.
+  // tests #3, 4, 5, and 8 pass.
   let encoder = new TextEncoder(); 
   contentTypeHeader.value = baseType+';charset=utf-8';
 
   
   // Now the actual filtering can begin!
   let filter = browser.webRequest.filterResponseData(details.requestId);
-  let fullStr = '';
+  let unprocessedStr = '';
+  let searchString = 'Test';
+  let leaveUnprocessedLength = searchString.length - 1;
   
   filter.ondata = e => {
     // Note that the event's data may break in the middle of an encoded
     // character - the stream parameter is critical for success as this
     // method gets called multiple times.
-    let str = decoder.decode(e.data, {stream: true});
-    fullStr += str;
+    unprocessedStr += decoder.decode(e.data, {stream: true});
+    // Process the received data as far as possible.
+    // Note this replacement is rather naive but demonstrates the idea
+    // If the search string was contained in the replacement string, 
+    // for instance, the repeated replacement like this could be bad.
+    unprocessedStr = unprocessedStr.replace(/Test/g, 'WebExtension Check');
+    if(unprocessedStr.length > leaveUnprocessedLength) {
+      let processedStr = unprocessedStr.substr(0, leaveUnprocessedLength);
+      unprocessedStr = unprocessedStr.substr(leaveUnprocessedLength);
+      filter.write(encoder.encode(processedStr));
+    }
   }
   
-  filter.onstop = async e => {
-    fullStr += decoder.decode(); //Flush the buffer
-    // Just change any instance of Test in the HTTP response to WebExtension Test.
-    let mutatedStr = fullStr.replace(/Test/g, 'WebExtension Test');
-    filter.write(encoder.encode(mutatedStr));
+  filter.onstop = async _ => {
+    // Flush the decoding buffer
+    unprocessedStr += decoder.decode();
+    // Flush our replacement buffer
+    let processedStr = unprocessedStr.replace(/Test/g, 'WebExtension Check');
+    filter.write(encoder.encode(processedStr));
     filter.close();
   }
 
@@ -95,6 +107,7 @@ function detectCharset(contentType) {
   return charset;
 }
 
+// Set up the actual webRequest hook
 browser.webRequest.onHeadersReceived.addListener(
   listener,
   {

From 6a5b6da73c81742b110f9d6abff0c8546b1af9b4 Mon Sep 17 00:00:00 2001
From: Jesse Trana <jessenkaren@gmail.com>
Date: Sun, 15 Nov 2020 16:16:36 -0600
Subject: [PATCH 10/10] Adding Rob W's example as a link in the README

---
 http-response/README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/http-response/README.md b/http-response/README.md
index be4252bc..f3c76764 100755
--- a/http-response/README.md
+++ b/http-response/README.md
@@ -19,6 +19,9 @@ A real-world example of WebRequest that shows four important details not always
 The domain w3.org is included in the list of domains to allow testing against [this suite of standardized tests](https://www.w3.org/2006/11/mwbp-tests/index.xhtml)
 regarding text encoding. Tests #1-8 pass, test #9 currently fails.
 
+For inspiration about how to make the charset detection more robust, see:
+https://github.com/Rob--W/open-in-browser/commit/a6b926ea9522b35298632e5e6a2c89ddb456c5d9
+
 ## Credits
 
 Icon is from: https://www.iconfinder.com/icons/763339/draw_edit_editor_pen_pencil_tool_write_icon#size=128