Skip to content

Commit

Permalink
fix #85 - Empty Cdata (#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
justinwilaby authored Dec 25, 2024
1 parent 6f0a0fe commit 61e271d
Show file tree
Hide file tree
Showing 15 changed files with 59 additions and 65 deletions.
19 changes: 0 additions & 19 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,5 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "Debug unit tests in library 'sax-wasm'",
"cargo": {
"args": [
"test",
"--no-run",
"--lib",
"--package=sax-wasm"
],
"filter": {
"name": "sax-wasm",
"kind": "lib"
}
},
"args": [],
"cwd": "${workspaceFolder}"
}
]
}
8 changes: 2 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ const saxPath = path.resolve(__dirname, 'node_modules/sax-wasm/lib/sax-wasm.wasm
const saxWasmBuffer = fs.readFileSync(saxPath);

// Instantiate
const options = { highWaterMark: 32 * 1024 }; // 32k chunks
const parser = new SAXParser(SaxEventType.Attribute | SaxEventType.OpenTag, options);
const parser = new SAXParser(SaxEventType.Attribute | SaxEventType.OpenTag);

// Instantiate and prepare the wasm for parsing
const ready = await parser.prepareWasm(saxWasmBuffer);
Expand Down Expand Up @@ -79,8 +78,7 @@ import { SaxEventType, SAXParser } from 'sax-wasm';
const response = fetch('path/to/sax-wasm.wasm');

// Instantiate
const options = { highWaterMark: 32 * 1024 }; // 32k chunks
const parser = new SAXParser(SaxEventType.Attribute | SaxEventType.OpenTag, options);
const parser = new SAXParser(SaxEventType.Attribute | SaxEventType.OpenTag);

// Instantiate and prepare the wasm for parsing
const ready = await parser.prepareWasm(response);
Expand Down Expand Up @@ -164,8 +162,6 @@ Constructs new SaxParser instance with the specified events bitmask and options
### Parameters
- `events` - A number representing a bitmask of events that should be reported by the parser.
- `options` - When specified, the `highWaterMark` option is used to prepare the parser for the expected size of each chunk
provided by the stream. The parser will throw if chunks written to it are larger.
### Methods
Expand Down
6 changes: 1 addition & 5 deletions lib/cjs/saxWasm.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,6 @@ interface WasmSaxParser extends WebAssembly.Exports {
write: (pointer: number, length: number) => void;
end: () => void;
}
export interface SaxParserOptions {
highWaterMark: number;
}
type TextDecoder = {
decode: (input?: ArrayBufferView | ArrayBuffer, options?: {
stream?: boolean;
Expand All @@ -94,9 +91,8 @@ export declare class SAXParser {
events?: number;
wasmSaxParser?: WasmSaxParser;
eventHandler?: (type: SaxEventType, detail: Detail) => void;
private readonly options;
private writeBuffer?;
constructor(events?: number, options?: SaxParserOptions);
constructor(events?: number);
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Detail]>;
write(chunk: Uint8Array): void;
end(): void;
Expand Down
6 changes: 2 additions & 4 deletions lib/cjs/saxWasm.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lib/cjs/saxWasm.js.map

Large diffs are not rendered by default.

6 changes: 1 addition & 5 deletions lib/esm/saxWasm.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,6 @@ interface WasmSaxParser extends WebAssembly.Exports {
write: (pointer: number, length: number) => void;
end: () => void;
}
export interface SaxParserOptions {
highWaterMark: number;
}
type TextDecoder = {
decode: (input?: ArrayBufferView | ArrayBuffer, options?: {
stream?: boolean;
Expand All @@ -94,9 +91,8 @@ export declare class SAXParser {
events?: number;
wasmSaxParser?: WasmSaxParser;
eventHandler?: (type: SaxEventType, detail: Detail) => void;
private readonly options;
private writeBuffer?;
constructor(events?: number, options?: SaxParserOptions);
constructor(events?: number);
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Detail]>;
write(chunk: Uint8Array): void;
end(): void;
Expand Down
6 changes: 2 additions & 4 deletions lib/esm/saxWasm.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lib/esm/saxWasm.js.map

Large diffs are not rendered by default.

Binary file modified lib/sax-wasm.wasm
Binary file not shown.
8 changes: 2 additions & 6 deletions lib/saxWasm.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,6 @@ interface WasmSaxParser extends WebAssembly.Exports {
write: (pointer: number, length: number) => void;
end: () => void;
}
export interface SaxParserOptions {
highWaterMark: number;
}
type TextDecoder = {
decode: (input?: ArrayBufferView | ArrayBuffer, options?: {
stream?: boolean;
Expand All @@ -94,10 +91,9 @@ export declare class SAXParser {
events?: number;
wasmSaxParser?: WasmSaxParser;
eventHandler?: (type: SaxEventType, detail: Detail) => void;
private readonly options;
private writeBuffer?;
constructor(events?: number, options?: SaxParserOptions);
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[number, Detail]>;
constructor(events?: number);
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Detail]>;
write(chunk: Uint8Array): void;
end(): void;
prepareWasm(source: Response | Promise<Response>): Promise<boolean>;
Expand Down
2 changes: 1 addition & 1 deletion src/js/__test__/benchmark.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import { Readable } from 'node:stream';
async function benchmarkSaxWasmParser() {
const saxWasm = readFileSync(resolve(new URL('../../../lib/sax-wasm.wasm', import.meta.url).pathname));

const parser = new SAXParser(SaxEventType.OpenTag, {highWaterMark: 64 * 1024});
const parser = new SAXParser(SaxEventType.OpenTag);
parser.eventHandler = () => void 0;
await parser.prepareWasm(saxWasm);

Expand Down
16 changes: 16 additions & 0 deletions src/js/__test__/cdada.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,20 @@ describe('When parsing XML, the SaxWasm', () => {
deepStrictEqual(JSON.parse(JSON.stringify(end)), { line: 0, character: 82 });
strictEqual(value, ' did you know "x < y" & "z > y"? so I guess that means that z > x ');
});

it("should support empty cdata", () => {
parser.write(
Buffer.from(`<div>
<div>
<![CDATA[]]>
</div>
<div>
<![CDATA[something]]>
</div>
</div>`),
);
const [empty, something] = _data;
strictEqual(empty.value, "");
strictEqual(something.value, "something");
});
});
4 changes: 2 additions & 2 deletions src/js/__test__/comment.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Attribute, SaxEventType, SAXParser } from '../saxWasm'
import { Attribute, Detail, SaxEventType, SAXParser } from '../saxWasm'
import { readFileSync } from 'fs';
import { resolve } from 'path';
import { deepStrictEqual, strictEqual } from 'assert';
Expand All @@ -13,7 +13,7 @@ describe('SaxWasm', () => {
beforeAll(async () => {
parser = new SAXParser(SaxEventType.Comment | SaxEventType.Attribute | SaxEventType.OpenTag);

parser.eventHandler = function (event: SaxEventType, data: Attribute) {
parser.eventHandler = function (event: SaxEventType, data: Detail) {
_event = event;
_data.push(data as Attribute);
};
Expand Down
7 changes: 1 addition & 6 deletions src/js/saxWasm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,6 @@ interface WasmSaxParser extends WebAssembly.Exports {
end: () => void;
}

export interface SaxParserOptions {
highWaterMark: number;
}

type TextDecoder = { decode: (input?: ArrayBufferView | ArrayBuffer, options?: { stream?: boolean }) => string };

Expand All @@ -233,11 +230,9 @@ export class SAXParser {
public wasmSaxParser?: WasmSaxParser;

public eventHandler?: (type: SaxEventType, detail: Detail) => void;
private readonly options: SaxParserOptions;
private writeBuffer?: Uint8Array;

constructor(events = 0, options: SaxParserOptions = { highWaterMark: 32 * 1024 }) {
this.options = options;
constructor(events = 0) {
const self = this;
Object.defineProperties(this, {
events: {
Expand Down
32 changes: 27 additions & 5 deletions src/sax/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,8 @@ impl SAXParser {
}
// Store these only if we're interested in CloseTag events
if len != 0 && self.events & Event::CloseTag as u32 != 0 {
self.tags[len - 1].text_nodes.push(text);
}
self.tags[len - 1].text_nodes.push(text);
}
}
self.new_tag();
}
Expand All @@ -250,8 +250,13 @@ impl SAXParser {
fn sgml_decl(&mut self, grapheme: &str) {
let is_sgml_char = match &self.sgml_decl.value as &str {
sgml if ascii_icompare("[cdata[", sgml) == true => {
self.state = State::Cdata;
self.cdata.value.push_str(grapheme);
// Empty cdata
if grapheme == "]" {
self.state = State::CdataEnding;
} else {
self.state = State::Cdata;
self.cdata.value.push_str(grapheme);
}
self.cdata.start = (self.line, self.character - 8);
false
}
Expand Down Expand Up @@ -403,7 +408,7 @@ impl SAXParser {
}

fn cdata_ending_2(&mut self, grapheme: &str) {
if grapheme == ">" && self.cdata.value.len() != 0 {
if grapheme == ">" {
self.new_text();
if self.events & Event::Cdata as u32 != 0 {
let mut cdata = mem::replace(&mut self.cdata, Text::new((0, 0)));
Expand Down Expand Up @@ -872,6 +877,23 @@ mod tests {
sax.events = Event::Text as u32;
let str = "<foo>{bar < baz ? <div></div> : <></>}</foo>";

sax.write(str.as_bytes());
Ok(())
}
#[test]
fn parse_empty_cdata() -> Result<()> {
let event_handler = |_event: Event, _data: Entity| {};
let mut sax = SAXParser::new(event_handler);
sax.events = Event::Cdata as u32;
let str = "<div>
<div>
<![CDATA[]]>
</div>
<div>
<![CDATA[something]]>
</div>
</div>";

sax.write(str.as_bytes());
Ok(())
}
Expand Down

0 comments on commit 61e271d

Please sign in to comment.