Skip to content

Commit 4ae8d27

Browse files
committed
fix(websocket): Folllow the description making algorithm in Cosense
1 parent b7feaf9 commit 4ae8d27

File tree

4 files changed

+86
-72
lines changed

4 files changed

+86
-72
lines changed

websocket/__snapshots__/findMetadata.test.ts.snap

Lines changed: 0 additions & 33 deletions
This file was deleted.

websocket/findMetadata.test.ts

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import { getPageMetadataFromLines, getHelpfeels } from "./findMetadata.ts";
2-
import { assertEquals } from "@std/assert";
3-
import { assertSnapshot } from "@std/testing/snapshot";
1+
import { getHelpfeels, getPageMetadataFromLines } from "./findMetadata.ts";
2+
import { assertEquals } from "@std/assert/equals";
43

54
// Test data for metadata extraction from a Scrapbox page
65
// This sample includes various Scrapbox syntax elements:
@@ -38,8 +37,45 @@ Prepare thumbnail
3837
3938
[https://scrapbox.io/files/65e7f4413bc95600258481fb.svg https://scrapbox.io/files/65e7f82e03949c0024a367d0.svg]`;
4039

41-
// Test findMetadata function's ability to extract various metadata from a page
42-
Deno.test("findMetadata()", (t) => assertSnapshot(t, getPageMetadataFromLines(text)));
40+
Deno.test("getPageMetadataFromLines()", () => {
41+
assertEquals(getPageMetadataFromLines(text), [
42+
"test page",
43+
[
44+
"normal",
45+
"link2",
46+
"hashtag",
47+
],
48+
[
49+
"/help-en/external-link",
50+
],
51+
[
52+
"scrapbox",
53+
"takker",
54+
],
55+
"https://scrapbox.io/files/65f29c24974fd8002333b160.svg",
56+
[
57+
"[normal]link",
58+
"but `this [link]` is not a link",
59+
"`Links [link] and images [https://scrapbox.io/files/65f29c0c9045b5002522c8bb.svg] in code blocks should be ignored`",
60+
"`? Need help with setup!!`",
61+
"#hashtag is recommended",
62+
],
63+
[
64+
"65f29c24974fd8002333b160",
65+
"65e7f82e03949c0024a367d0",
66+
"65e7f4413bc95600258481fb",
67+
],
68+
[
69+
"Need help with setup!!",
70+
],
71+
[
72+
"Name\t[scrapbox.icon]",
73+
"Address\tAdd [link2] here",
74+
"Phone\tAdding # won't create a link",
75+
"Strengths\tList about 3 items",
76+
],
77+
]);
78+
});
4379

4480
// Test Helpfeel extraction (lines starting with "?")
4581
// These are used for collecting questions and help requests in Scrapbox

websocket/findMetadata.ts

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,17 @@ import { parseYoutube } from "../parser/youtube.ts";
2121
export const getPageMetadataFromLines = (
2222
text: string,
2323
): [
24-
string[],
25-
string[],
26-
string[],
27-
string | null,
28-
string[],
29-
string[],
30-
string[],
24+
title: string,
25+
links: string[],
26+
projectLinks: string[],
27+
icons: string[],
28+
image: string | null,
29+
descriptions: string[],
30+
files: string[],
31+
helpfeels: string[],
32+
infoboxDefinition: string[],
3133
] => {
32-
const blocks = parse(text, { hasTitle: true }).flatMap((block) => {
33-
switch (block.type) {
34-
case "codeBlock":
35-
case "title":
36-
return [];
37-
case "line":
38-
case "table":
39-
return block;
40-
}
41-
});
34+
const blocks = parse(text, { hasTitle: true });
4235

4336
/** Map for detecting duplicate links while preserving link type information
4437
*
@@ -49,13 +42,15 @@ export const getPageMetadataFromLines = (
4942
* When the same page is referenced by both formats,
5043
* we prioritize the bracket link format in the final output
5144
*/
45+
let title = "";
5246
const linksLc = new Map<string, boolean>();
5347
const links = [] as string[];
5448
const projectLinksLc = new Set<string>();
5549
const projectLinks = [] as string[];
5650
const iconsLc = new Set<string>();
5751
const icons = [] as string[];
5852
let image: string | null = null;
53+
const descriptions = [] as string[];
5954
const files = new Set<string>();
6055
const helpfeels = new Set<string>();
6156

@@ -150,11 +145,31 @@ export const getPageMetadataFromLines = (
150145

151146
for (const block of blocks) {
152147
switch (block.type) {
148+
case "title": {
149+
title = block.text;
150+
continue;
151+
}
153152
case "line":
153+
if (descriptions.length < 5 && block.nodes.length > 0) {
154+
descriptions.push(
155+
block.nodes[0].type === "helpfeel" ||
156+
block.nodes[0].type === "commandLine"
157+
? makeInlineCodeForDescription(block.nodes[0].raw)
158+
: block.nodes.map((node) => node.raw).join("").trim().slice(
159+
0,
160+
200,
161+
),
162+
);
163+
}
154164
for (const node of block.nodes) {
155165
lookup(node);
156166
}
157167
continue;
168+
case "codeBlock":
169+
if (descriptions.length < 5) {
170+
descriptions.push(makeInlineCodeForDescription(block.content));
171+
}
172+
continue;
158173
case "table": {
159174
for (const row of block.cells) {
160175
for (const nodes of row) {
@@ -176,16 +191,21 @@ export const getPageMetadataFromLines = (
176191
}
177192

178193
return [
194+
title,
179195
links,
180196
projectLinks,
181197
icons,
182198
image,
199+
descriptions,
183200
[...files],
184201
[...helpfeels],
185202
infoboxDefinition,
186203
];
187204
};
188205

206+
const makeInlineCodeForDescription = (text: string): `\`${string}\`` =>
207+
`\`${text.trim().replaceAll("`", "\\`").slice(0, 198)}\``;
208+
189209
const cutId = (link: string): string => link.replace(/#[a-f\d]{24,32}$/, "");
190210

191211
/** Extract Helpfeel entries from text

websocket/makeChanges.ts

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { diffToChanges } from "./diffToChanges.ts";
22
import type { Page } from "@cosense/types/rest";
33
import type { Change } from "./change.ts";
4-
import { getPageMetadataFromLines, getHelpfeels } from "./findMetadata.ts";
4+
import { getHelpfeels, getPageMetadataFromLines } from "./findMetadata.ts";
55
import { isSameArray } from "./isSameArray.ts";
66
import { isString } from "@core/unknownutil/is/string";
77

@@ -22,22 +22,6 @@ export function* makeChanges(
2222
yield change;
2323
}
2424

25-
// Handle title changes
26-
// Note: We always include title change commits for new pages (`persistent === false`)
27-
// to ensure proper page initialization
28-
if (before.lines[0].text !== after_[0] || !before.persistent) {
29-
yield { title: after_[0] };
30-
}
31-
32-
// Process changes in page descriptions
33-
// Descriptions are the first 5 lines after the title (lines 1-5)
34-
// These lines provide a summary or additional context for the page
35-
const leftDescriptions = before.lines.slice(1, 6).map((line) => line.text);
36-
const rightDescriptions = after_.slice(1, 6);
37-
if (leftDescriptions.join("") !== rightDescriptions.join("")) {
38-
yield { descriptions: rightDescriptions };
39-
}
40-
4125
// Process changes in various metadata
4226
// Metadata includes:
4327
// - links: References to other pages
@@ -48,18 +32,25 @@ export function* makeChanges(
4832
// - helpfeels: Questions or help requests (lines starting with "?")
4933
// - infoboxDefinition: Structured data definitions
5034
const [
35+
title,
5136
links,
5237
projectLinks,
5338
icons,
5439
image,
40+
descriptions,
5541
files,
5642
helpfeels,
5743
infoboxDefinition,
5844
] = getPageMetadataFromLines(after_.join("\n"));
45+
// Handle title changes
46+
// Note: We always include title change commits for new pages (`persistent === false`)
47+
// to ensure proper page initialization
48+
if (before.title !== title || !before.persistent) yield { title };
5949
if (!isSameArray(before.links, links)) yield { links };
6050
if (!isSameArray(before.projectLinks, projectLinks)) yield { projectLinks };
6151
if (!isSameArray(before.icons, icons)) yield { icons };
6252
if (before.image !== image) yield { image };
53+
if (!isSameArray(before.descriptions, descriptions)) yield { descriptions };
6354
if (!isSameArray(before.files, files)) yield { files };
6455
if (!isSameArray(getHelpfeels(before.lines), helpfeels)) yield { helpfeels };
6556
if (!isSameArray(before.infoboxDefinition, infoboxDefinition)) {

0 commit comments

Comments
 (0)