Skip to content

Commit 0ada883

Browse files
committed
fix(codemod-sandbox): fix WASM byte offset handling
1 parent a94dcc4 commit 0ada883

File tree

2 files changed

+43
-12
lines changed

2 files changed

+43
-12
lines changed

crates/codemod-sandbox/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@codemod.com/codemod-sandbox",
3-
"version": "0.1.4",
3+
"version": "0.1.7",
44
"description": "Codemod Javascript Sandbox",
55
"main": "./dist/js/index.js",
66
"exports": {

crates/codemod-sandbox/src/ast_grep/wasm_lang.rs

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,17 @@ impl Content for Wrapper {
279279
}
280280
}
281281

282+
fn char_offset_to_byte_offset(text: &str, char_offset: usize) -> usize {
283+
text.chars().take(char_offset).map(|c| c.len_utf8()).sum()
284+
}
285+
282286
fn pos_for_byte_offset(input: &[u8], offset: usize) -> Point {
283287
debug_assert!(offset <= input.len());
284288
let (mut row, mut col) = (0, 0);
285-
for &b in input.iter().take(offset) {
286-
if b == b'\n' {
289+
let input_str = std::str::from_utf8(&input[..offset]).unwrap_or("");
290+
291+
for c in input_str.chars() {
292+
if c == '\n' {
287293
row += 1;
288294
col = 0;
289295
} else {
@@ -407,21 +413,33 @@ impl<'a> SgNode<'a> for Node {
407413
self.0.id() as usize
408414
}
409415
fn range(&self) -> std::ops::Range<usize> {
410-
(self.0.start_index() as usize)..(self.0.end_index() as usize)
416+
// WASM tree-sitter returns character indices, convert to byte indices
417+
let start_char_idx = self.0.start_index() as usize;
418+
let end_char_idx = self.0.end_index() as usize;
419+
420+
let root_text = self.get_root_text();
421+
let start_byte = char_offset_to_byte_offset(&root_text, start_char_idx);
422+
let end_byte = char_offset_to_byte_offset(&root_text, end_char_idx);
423+
424+
start_byte..end_byte
411425
}
412426
fn start_pos(&self) -> Position {
413427
let start = self.0.start_position();
414-
let offset = self.0.start_index();
415-
Position::new(
416-
start.row() as usize,
417-
start.column() as usize,
418-
offset as usize,
419-
)
428+
let char_offset = self.0.start_index() as usize;
429+
430+
let root_text = self.get_root_text();
431+
let byte_offset = char_offset_to_byte_offset(&root_text, char_offset);
432+
433+
Position::new(start.row() as usize, start.column() as usize, byte_offset)
420434
}
421435
fn end_pos(&self) -> Position {
422436
let end = self.0.end_position();
423-
let offset = self.0.end_index();
424-
Position::new(end.row() as usize, end.column() as usize, offset as usize)
437+
let char_offset = self.0.end_index() as usize;
438+
439+
let root_text = self.get_root_text();
440+
let byte_offset = char_offset_to_byte_offset(&root_text, char_offset);
441+
442+
Position::new(end.row() as usize, end.column() as usize, byte_offset)
425443
}
426444
// missing node is a tree-sitter specific concept
427445
fn is_missing(&self) -> bool {
@@ -458,6 +476,19 @@ impl<'a> SgNode<'a> for Node {
458476
}
459477
}
460478

479+
// This is a horrible idea! But it does the job for WASM for now!
480+
// We don't need WASM to be perfect
481+
// That said, we should fix this in the future!
482+
impl Node {
483+
fn get_root_text(&self) -> String {
484+
let mut current = self.0.clone();
485+
while let Some(parent) = current.parent() {
486+
current = parent;
487+
}
488+
current.text().into()
489+
}
490+
}
491+
461492
impl Doc for WasmDoc {
462493
type Lang = WasmLang;
463494
type Source = Wrapper;

0 commit comments

Comments
 (0)