Skip to content

Commit cb8384d

Browse files
committed
indexing: generate wacz indexes consistent with warcio.js, including surt, and post-request query in requestBody field and method field
app: fix wacz download/ipfs sharing in app dependencies: update to wabac.js 2.6.1, warcio.js 1.4.1 and replayweb.page 1.3.9 bump to 0.5.9
1 parent e49ae7c commit cb8384d

File tree

10 files changed

+81
-66
lines changed

10 files changed

+81
-66
lines changed

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
{
22
"name": "ArchiveWeb.page",
3-
"version": "0.5.8",
3+
"version": "0.5.9",
44
"main": "index.js",
55
"description": "Create Web Archives directly in your browser",
66
"repository": "https://github.com/webrecorder/archiveweb.page",
77
"author": "Webrecorder Software",
88
"license": "AGPL-3.0-or-later",
99
"dependencies": {
1010
"@fortawesome/fontawesome-free": "^5.13.0",
11-
"@webrecorder/wabac": "github:webrecorder/wabac.js",
11+
"@webrecorder/wabac": "^2.6.1",
1212
"bulma": "^0.9.1",
1313
"flexsearch": "^0.6.32",
1414
"hash-wasm": "^4.4.1",
@@ -21,7 +21,7 @@
2121
"node-fetch": "^2.6.1",
2222
"pretty-bytes": "^5.3.0",
2323
"replaywebpage": "github:webrecorder/replayweb.page",
24-
"warcio": "^1.3.3"
24+
"warcio": "^1.4.1"
2525
},
2626
"devDependencies": {
2727
"copy-webpack-plugin": "^6.4.0",

src/downloader.js

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { Deflate } from 'pako';
66

77
import { createMD5 } from 'hash-wasm';
88

9-
import { WARCRecord, WARCSerializer } from 'warcio';
9+
import { WARCRecord, WARCSerializer, getSurt } from 'warcio';
1010

1111
import { getTSMillis, getStatusText } from '@webrecorder/wabac/src/utils';
1212

@@ -19,6 +19,8 @@ const encoder = new TextEncoder();
1919

2020
const EMPTY = new Uint8Array([]);
2121

22+
const SPLIT_REQUEST_Q_RX = /(.*?)[?&](?:__wb_method=|__wb_post=)[^&]+&(.*)/;
23+
2224
async function* getPayload(payload) {
2325
yield payload;
2426
}
@@ -294,6 +296,7 @@ class Downloader
294296
function getCDX(resource, filename, raw) {
295297

296298
const data = {
299+
url: resource.url,
297300
digest: resource.digest,
298301
mime: resource.mime,
299302
offset: resource.offset,
@@ -302,7 +305,18 @@ class Downloader
302305
status: resource.status
303306
}
304307

305-
const cdx = `${resource.url} ${resource.timestamp} ${JSON.stringify(data)}\n`;
308+
if (resource.method && resource.method !== "GET") {
309+
const m = resource.url.match(SPLIT_REQUEST_Q_RX);
310+
if (m) {
311+
data.url = m[1];
312+
data.requestBody = m[2];
313+
}
314+
data.method = resource.method;
315+
}
316+
317+
const surt = getSurt(resource.url);
318+
319+
const cdx = `${surt} ${resource.timestamp} ${JSON.stringify(data)}\n`;
306320

307321
if (!raw) {
308322
return cdx;
@@ -362,7 +376,7 @@ class Downloader
362376
}
363377

364378
if (!key) {
365-
key = resource.url + " " + resource.timestamp;
379+
key = cdx.split(" {", 1)[0];
366380
}
367381

368382
if (++count === this.linesPerBlock) {

src/recorder.js

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ class Recorder {
193193
await this.send("Network.setCacheDisabled", {cacheDisabled: true}, sessions);
194194
await this.send("Network.setBypassServiceWorker", {bypass: true}, sessions);
195195
// another option: clear cache, but don't disable
196-
//await this.send("Network.clearBrowserCache", null, sessions);
196+
await this.send("Network.clearBrowserCache", null, sessions);
197197
} catch (e) {
198198
console.warn("Session Init Error: ");
199199
console.log(e);
@@ -935,7 +935,12 @@ class Recorder {
935935
opts.headers.delete("range");
936936
}
937937

938-
const resp = await fetch(request.url, opts);
938+
let resp = await fetch(request.url, opts);
939+
if (resp.status !== 200) {
940+
console.warn(`async fetch error ${resp.status}, retrying without headers`);
941+
resp = await fetch(request.url);
942+
}
943+
939944
const payload = await resp.arrayBuffer();
940945

941946
const reqresp = new RequestResponseInfo(fetchId);

src/requestresponseinfo.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"use strict";
22

3-
import { postToGetUrl, getStatusText } from '@webrecorder/wabac/src/utils';
3+
import { getStatusText } from '@webrecorder/wabac/src/utils';
4+
5+
import { postToGetUrl } from 'warcio';
46

57

68
// ===========================================================================
@@ -155,8 +157,8 @@ class RequestResponseInfo
155157
postData: this.postData,
156158
}
157159
if (postToGetUrl(convData)) {
160+
this.requestBody = convData.requestBody;
158161
this.url = convData.url;
159-
this.method = "GET";
160162
}
161163
}
162164

webpack.config.js

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@ const BANNER = '[name].js is part of the Webrecorder Extension (https://replaywe
1515

1616
const manifest = require("./src/ext/manifest.json");
1717

18+
const defaultDefines = {
19+
__VERSION__: JSON.stringify(PACKAGE.version),
20+
__WARCIO_VERSION__: JSON.stringify(WARCIO_PACKAGE.version),
21+
__SW_NAME__: JSON.stringify("sw.js"),
22+
__IPFS_CORE_URL__: JSON.stringify(""),
23+
__IPFS_HTTP_CLIENT_URL__: JSON.stringify("")
24+
};
25+
1826

1927
const moduleSettings = {
2028
rules: [
@@ -56,10 +64,7 @@ const electronMainConfig = (env, argv) => {
5664
__filename: false,
5765
},
5866
plugins: [
59-
new webpack.DefinePlugin({
60-
__IPFS_CORE_URL__: JSON.stringify(""),
61-
__APP_FILE_SERVE_PREFIX__ : JSON.stringify(APP_FILE_SERVE_PREFIX),
62-
}),
67+
new webpack.DefinePlugin(defaultDefines),
6368
new webpack.BannerPlugin(BANNER),
6469
new CopyPlugin({
6570
patterns: [
@@ -91,12 +96,7 @@ const electronPreloadConfig = (env, argv) => {
9196
filename: '[name].js'
9297
},
9398
plugins: [
94-
// this needs to be defined, but not actually used, as electron app uses
95-
// ipfs-core from node
96-
new webpack.DefinePlugin({
97-
__IPFS_CORE_URL__: JSON.stringify(""),
98-
__APP_FILE_SERVE_PREFIX__ : JSON.stringify(APP_FILE_SERVE_PREFIX),
99-
}),
99+
new webpack.DefinePlugin(defaultDefines),
100100
]
101101
}
102102
};
@@ -124,10 +124,7 @@ const electronRendererConfig = (env, argv) => {
124124
{ from: 'src/electron/rec-window.html', to: '' },
125125
]
126126
}),
127-
new webpack.DefinePlugin({
128-
__IPFS_CORE_URL__: JSON.stringify(""),
129-
__APP_FILE_SERVE_PREFIX__ : JSON.stringify(APP_FILE_SERVE_PREFIX),
130-
})
127+
new webpack.DefinePlugin(defaultDefines),
131128
],
132129

133130
module: moduleSettings,
@@ -172,12 +169,8 @@ const extensionConfig = (env, argv) => {
172169
new MiniCssExtractPlugin(),
173170
new webpack.BannerPlugin(BANNER),
174171
new GenerateJsonPlugin('manifest.json', manifest, generateManifest, 2),
175-
new webpack.DefinePlugin({
176-
__VERSION__: JSON.stringify(PACKAGE.version),
177-
__WARCIO_VERSION__: JSON.stringify(WARCIO_PACKAGE.version),
178-
__SW_NAME__: JSON.stringify("sw.js"),
172+
new webpack.DefinePlugin({...defaultDefines,
179173
__IPFS_CORE_URL__: JSON.stringify(IPFS_CORE_URL),
180-
__IPFS_HTTP_CLIENT_URL__: JSON.stringify("")
181174
}),
182175
new CopyPlugin({
183176
patterns: [

wr-ext/bg.js

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

wr-ext/manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "Webrecorder ArchiveWeb.page",
33
"description": "Create high-fidelity web archives directly in your browser",
4-
"version": "0.5.8",
4+
"version": "0.5.9",
55
"content_security_policy": "script-src 'self' 'unsafe-eval'; object-src 'self'",
66
"permissions": [
77
"debugger",

wr-ext/replay/sw.js

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

wr-ext/replay/ui.js

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

yarn.lock

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -808,11 +808,12 @@
808808
"@webassemblyjs/wast-parser" "1.9.0"
809809
"@xtuc/long" "4.2.2"
810810

811-
"@webrecorder/wabac@github:webrecorder/wabac.js":
812-
version "2.6.0-beta.1"
813-
resolved "https://codeload.github.com/webrecorder/wabac.js/tar.gz/cf5f5fa51a8700cbcf3d0a04b44831366558d91e"
811+
"@webrecorder/wabac@^2.6.1":
812+
version "2.6.1"
813+
resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.6.1.tgz#e1e1cd5a2cd43043fbbdaa0fb49386f96bba4ccb"
814+
integrity sha512-+7sfiRQ15/Xmw4ONZXC5UKXf7v1Vr3JhjHCvdpyuAdYNcS7zPeoqj7jK2CIywzuBkeKcRDkgvyKYbXGy2FCGfQ==
814815
dependencies:
815-
"@webrecorder/wombat" "^3.1.0"
816+
"@webrecorder/wombat" "^3.1.1"
816817
brotli "github:foliojs/brotli.js"
817818
fast-xml-parser "^3.15.1"
818819
format-link-header "^3.1.1"
@@ -827,13 +828,13 @@
827828
parse5-sax-parser "^6.0.1"
828829
path-parser "^6.1.0"
829830
unescape-js "^1.1.4"
830-
warcio "^1.3.3"
831+
warcio "^1.4.1"
831832
wbn "^0.0.3"
832833

833-
"@webrecorder/wombat@^3.1.0":
834-
version "3.1.0"
835-
resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.1.0.tgz#6df2e84577e2cbcf4a577ed91526723537495666"
836-
integrity sha512-4fYNahANNcWwXc8+JHZ7xtmMnESY1kZGwzgK3snRuO5/CMosNX1rMQ0TBbTGMjv0izFs3Pl4V4FGIDBkrAEHVA==
834+
"@webrecorder/wombat@^3.1.1":
835+
version "3.1.1"
836+
resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.1.1.tgz#8a62e1b260e4c8e6be52a0cb70f6d8689777ed1d"
837+
integrity sha512-33LCH7L165CPx+Q/RqkQbTeD5SPI/d1OKnI9lzDo0nJu9e+zNdz+apAiCOMLXvzoFAkYzooITfvzHlHWaf+LKA==
837838

838839
"@xtuc/ieee754@^1.2.0":
839840
version "1.2.0"
@@ -8361,7 +8362,7 @@ repeating@^2.0.0:
83618362

83628363
"replaywebpage@github:webrecorder/replayweb.page":
83638364
version "1.3.9"
8364-
resolved "https://codeload.github.com/webrecorder/replayweb.page/tar.gz/007da4b52cae4dea300663d15e5c45c9bae3bd4d"
8365+
resolved "https://codeload.github.com/webrecorder/replayweb.page/tar.gz/746d0e9a79fde6eaa090270c001b3641ea768716"
83658366
dependencies:
83668367
"@fortawesome/fontawesome-free" "^5.13.0"
83678368
"@webrecorder/wabac" "github:webrecorder/wabac.js"
@@ -10013,10 +10014,10 @@ vm-browserify@^1.0.1:
1001310014
resolved "https://registry.yarnpkg.com/vm-browserify/-/vm-browserify-1.1.2.tgz#78641c488b8e6ca91a75f511e7a3b32a86e5dda0"
1001410015
integrity sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==
1001510016

10016-
warcio@^1.3.3:
10017-
version "1.3.3"
10018-
resolved "https://registry.yarnpkg.com/warcio/-/warcio-1.3.3.tgz#d215223e6ea2cf1fbaea645f8dd34b9430c258a8"
10019-
integrity sha512-Qr0Uw/tqZXzDBwT3BmfHuFu90jMT0Mc/JzZMhCdXyqdihVtgOZqtvwj+JcDBBmBDrpIccabi/XRXVbAGbE9U+Q==
10017+
warcio@^1.4.1:
10018+
version "1.4.1"
10019+
resolved "https://registry.yarnpkg.com/warcio/-/warcio-1.4.1.tgz#6d289f37044eda58e369db2bed18cdaf6f9f7beb"
10020+
integrity sha512-4Kue2T+Zt93L/CyBSFh3sHF7NgR+DER5FQHoU0uGBR6abcqBqQ+mE22OE6fpjSReZMx2jIeiAQLD8XQzPq3P3g==
1002010021
dependencies:
1002110022
"@peculiar/webcrypto" "^1.1.1"
1002210023
esm "^3.2.25"

0 commit comments

Comments
 (0)