Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/tdev/docx-grader/extractors/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { default as textExtractor } from './text';
import { default as languageExtractor } from './language';
export { textExtractor, languageExtractor };
38 changes: 38 additions & 0 deletions packages/tdev/docx-grader/extractors/language.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { Node, visit, Visitor } from '..';
import { default as extractText } from './text';

interface LanguagePart {
lang: string;
text: string;
}

const getLang = (rPr: Node | string) => {
if (typeof rPr === 'string' || typeof rPr['w:lang'] !== 'object') {
return undefined;
}
if (Array.isArray(rPr['w:lang']) || typeof rPr['w:lang'].attributes !== 'object') {
return undefined;
}
return rPr['w:lang'].attributes['w:val'];
};

const extractor = (node: Node) => {
const languageParts: LanguagePart[] = [];
visit(
node,
(name, node) => {
return name === 'w:rPr' && getLang(node) !== undefined;
},
(name, rPr, index, parent) => {
const lang = getLang(rPr);
if (typeof lang === 'string') {
const text = parent ? extractText(parent.node) : '';
languageParts.push({ lang, text });
}
return 'continue';
}
);
return languageParts;
};

export default extractor;
27 changes: 27 additions & 0 deletions packages/tdev/docx-grader/extractors/text.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { Node, visit } from '..';

const extractor = (node: Node) => {
const parts: string[] = [];
visit(
node,
(n) => n === 'w:p',
(_, node) => {
const inlineParts: string[] = [];
visit(
node,
(n) => n === 'w:t',
(_, node) => {
if (node['#text'] !== undefined) {
inlineParts.push(node['#text']);
}
return 'skipChildren';
}
);
parts.push(inlineParts.join(''));
return 'continue';
}
);
return parts.join('\n');
};

export default extractor;
66 changes: 66 additions & 0 deletions packages/tdev/docx-grader/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
interface Attribute {
[key: string]: string | number | undefined;
}

export interface Node {
attributes?: Attribute;
['#text']?: string;
parent?: Node;
[key: string]: Node | Node[] | string | number | Attribute | undefined;
}
export type Action = 'continue' | 'break' | 'skip' | 'skipChildren';

export type Tester = (name: string, node: Node) => boolean;
export type Visitor = (
name: string,
node: Node,
index: number,
parent?: { name: string; node: Node }
) => Action;

export const visit = (node: Node, test: Tester, visit: Visitor) => {
const visitNode = (
name: string,
node: Node,
index: number,
parent?: { name: string; node: Node }
): Action => {
if (test(name, node)) {
const action = visit(name, node, index, parent);
if (action === 'break') {
return 'break';
}
if (action === 'skipChildren') {
return 'continue';
}
}
const childNames = Object.keys(node).filter(
(prop) => prop !== 'attributes' && prop !== '#text' && prop !== 'parent'
);
childNames.forEach((name, idx) => {
const child = node[name];
if (Array.isArray(child)) {
for (let i = 0; i < child.length; i++) {
const action = visitNode(name, child[i], i, { name, node });
if (action === 'break') {
return 'break';
}
if (action === 'skip') {
continue;
}
}
} else if (typeof child === 'object' && child !== null) {
const action = visitNode(name, child, -1, { name, node });
if (action === 'break') {
return 'break';
}
if (action === 'skip') {
return 'continue';
}
}
});
return 'continue';
};

return visitNode('root', node, -1);
};
18 changes: 18 additions & 0 deletions packages/tdev/docx-grader/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "@tdev/docx-grader",
"version": "1.0.0",
"main": "index.ts",
"types": "index.ts",
"dependencies": {
"fast-xml-parser": "^5.2.5",
"fflate": "^0.8.2"
},
"devDependencies": {
"vitest": "*",
"@docusaurus/module-type-aliases": "*",
"@docusaurus/core": "*"
},
"peerDependencies": {
"@tdev/core": "1.0.0"
}
}
3 changes: 3 additions & 0 deletions packages/tdev/docx-grader/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"extends": "../../../tsconfig.json"
}
155 changes: 155 additions & 0 deletions src/pages/docx/index.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import React from 'react';
import Layout from '@theme/Layout';
import { unzip } from 'fflate';
import { XMLParser } from 'fast-xml-parser';
import CodeBlock from '@theme/CodeBlock';
import { visit } from '@tdev/docx-grader';
import { languageExtractor, textExtractor } from '@tdev/docx-grader/extractors';

const unzipFile = (file: File): Promise<{ [path: string]: Uint8Array }> => {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => {
const buffer = new Uint8Array(reader.result as ArrayBuffer);
unzip(buffer, (err, unzipped) => {
if (err) reject(err);
else resolve(unzipped);
});
};
reader.readAsArrayBuffer(file);
});
};

const parseXmlFiles = (unzipped: { [path: string]: Uint8Array }) => {
const parser = new XMLParser({
ignoreAttributes: false,
attributeNamePrefix: '',
attributesGroupName: 'attributes',
textNodeName: '#text',
alwaysCreateTextNode: true,
parseAttributeValue: true,
parseTagValue: true,
trimValues: false,
allowBooleanAttributes: true
});

const xmlFiles: { [path: string]: any } = {};

Object.entries(unzipped).forEach(([path, data]) => {
if (path.endsWith('.xml') || path.endsWith('.rels')) {
try {
const xmlString = new TextDecoder('utf-8').decode(data);
xmlFiles[path] = parser.parse(xmlString);
} catch (err) {
console.warn(`Failed to parse XML file ${path}:`, err);
}
} else {
xmlFiles[path] = data; // Keep non-XML files as Uint8Array
}
});

return xmlFiles;
};

export default function Home(): React.ReactNode {
const [parsedFiles, setParsedFiles] = React.useState<{ [path: string]: any } | null>(null);
const [loading, setLoading] = React.useState(false);
const [error, setError] = React.useState<string | null>(null);
const [shownFile, setShownFiles] = React.useState<string>('');
const [plainText, setPlainText] = React.useState<string>('');

const handleFileUpload = async (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (!file) return;

if (!file.name.endsWith('.docx') && !file.name.endsWith('.odt')) {
setError('Please select a .docx file');
return;
}

setLoading(true);
setError(null);

try {
const unzipped = await unzipFile(file);
const parsed = parseXmlFiles(unzipped);
setParsedFiles(parsed);
setShownFiles('word/document.xml');
} catch (err) {
setError('Failed to process file: ' + (err as Error).message);
} finally {
setLoading(false);
}
};

React.useEffect(() => {
const parsed = parsedFiles?.[shownFile];
if (!parsed) {
setPlainText('');
return;
}
setPlainText(textExtractor(parsed));
console.log(languageExtractor(parsed));
}, [shownFile]);

return (
<Layout description="DOCX">
<main>
<h1>DOCX</h1>
<div style={{ padding: '20px', margin: '0 20px' }}>
<h1>Word Document Reader</h1>

<div style={{ marginBottom: '20px' }}>
<input
type="file"
accept=".docx,.odt"
onChange={handleFileUpload}
disabled={loading}
/>
</div>

{loading && <p>Loading document...</p>}

{error && <div style={{ color: 'red', marginBottom: '20px' }}>{error}</div>}

{parsedFiles && (
<div style={{ border: '1px solid #ccc', padding: '15px', borderRadius: '5px' }}>
<h3>Document Parsed Successfully</h3>
<p>Found {Object.keys(parsedFiles).length} files</p>
<details>
<summary>File Structure</summary>
{Object.keys(parsedFiles)
.filter((f) => f.endsWith('xml'))
.map((filePath) => (
<div key={filePath} style={{ margin: '5px 0' }}>
<button
style={{
background: 'none',
border: 'none',
color: shownFile === filePath ? 'blue' : 'black',
cursor: 'pointer',
textDecoration:
shownFile === filePath ? 'underline' : 'none'
}}
onClick={() => setShownFiles(filePath)}
>
{filePath}
</button>
</div>
))}
</details>
<CodeBlock language="text" title="Plain Text">
{plainText}
</CodeBlock>
{parsedFiles[shownFile] && (
<CodeBlock language="json" title={shownFile}>
{JSON.stringify(parsedFiles[shownFile] || {}, null, 2)}
</CodeBlock>
)}
</div>
)}
</div>
</main>
</Layout>
);
}
17 changes: 17 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8444,6 +8444,13 @@ fast-uri@^3.0.1:
resolved "https://registry.yarnpkg.com/fast-uri/-/fast-uri-3.0.6.tgz#88f130b77cfaea2378d56bf970dea21257a68748"
integrity sha512-Atfo14OibSv5wAp4VWNsFYE1AchQRTv9cBGWET4pZWHzYshFSS9NQI6I57rdKn9croWVMbYFbLhJ+yJvmZIIHw==

fast-xml-parser@^5.2.5:
version "5.2.5"
resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-5.2.5.tgz#4809fdfb1310494e341098c25cb1341a01a9144a"
integrity sha512-pfX9uG9Ki0yekDHx2SiuRIyFdyAr1kMIMitPvb0YBo8SUfKvia7w7FIyd/l6av85pFYRhZscS75MwMnbvY+hcQ==
dependencies:
strnum "^2.1.0"

fastq@^1.6.0:
version "1.19.1"
resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.19.1.tgz#d50eaba803c8846a883c16492821ebcd2cda55f5"
Expand Down Expand Up @@ -8477,6 +8484,11 @@ feed@^4.2.2:
dependencies:
xml-js "^1.6.11"

fflate@^0.8.2:
version "0.8.2"
resolved "https://registry.yarnpkg.com/fflate/-/fflate-0.8.2.tgz#fc8631f5347812ad6028bbe4a2308b2792aa1dea"
integrity sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==

figures@^3.2.0:
version "3.2.0"
resolved "https://registry.yarnpkg.com/figures/-/figures-3.2.0.tgz#625c18bd293c604dc4a8ddb2febf0c88341746af"
Expand Down Expand Up @@ -13943,6 +13955,11 @@ strip-literal@^3.0.0:
dependencies:
js-tokens "^9.0.1"

strnum@^2.1.0:
version "2.1.1"
resolved "https://registry.yarnpkg.com/strnum/-/strnum-2.1.1.tgz#cf2a6e0cf903728b8b2c4b971b7e36b4e82d46ab"
integrity sha512-7ZvoFTiCnGxBtDqJ//Cu6fWtZtc7Y3x+QOirG15wztbdngGSkht27o2pyGWrVy0b4WAy3jbKmnoK6g5VlVNUUw==

style-mod@^4.0.0, style-mod@^4.1.0:
version "4.1.2"
resolved "https://registry.yarnpkg.com/style-mod/-/style-mod-4.1.2.tgz#ca238a1ad4786520f7515a8539d5a63691d7bf67"
Expand Down