Skip to content

Commit 961ac02

Browse files
committed
parser: Fix another complex log parsing example
1 parent 34a9a6f commit 961ac02

File tree

2 files changed

+89
-3
lines changed

2 files changed

+89
-3
lines changed

utils/ByteStringParser.test.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,31 @@ describe('ByteStringParser', () => {
183183
}
184184
expect(foundIndex).toBe(expectedBytes.length);
185185
});
186+
187+
it('should parse continuous hex from messy logs (no spaces)', () => {
188+
const input = `gingin-chasm-1 | [00] 2025-10-16T10:50:50.203Z INFO in/udp.go:296 Received NATS message to send to device 10579203: 033a00d00700000100012dd300244090270e4750504e554c4c414e54454e4e4100000d5452494d424c4520414c4c4f59000053881f0000000048`;
189+
const result = parseByteString(input);
190+
expect(result.hasHex).toBe(true);
191+
// The hex string should be parsed
192+
expect(result.bytes.length).toBeGreaterThan(0);
193+
// Check that we got the expected bytes at the start
194+
expect(result.bytes[0]).toBe(0x03);
195+
expect(result.bytes[1]).toBe(0x3a);
196+
expect(result.bytes[2]).toBe(0x00);
197+
expect(result.bytes[3]).toBe(0xd0);
198+
});
199+
200+
it('should parse multiple continuous hex sequences from logs', () => {
201+
const input = `gingin-chasm-1 | [00] 2025-10-16T10:50:50.203Z INFO in/udp.go:296 Received NATS message to send to device 10579203: 033a00d00700000100012dd300244090270e4750504e554c4c414e54454e4e4100000d5452494d424c4520414c4c4f59000053881f0000000048
202+
gingin-chasm-1 | [00] 2025-10-16T10:50:50.223Z INFO in/udp.go:296 Received NATS message to send to device 10579203: 03f800d0070000010001ebd300e24320275bb64582002085560c020000000020204080777fe7e7fa2a427a72428a1a1a23ead8a5490b42dcfef00eca4e5caa5157b3cc680e502484b5100c200c3e30707132827104fb166430da6cdcf3d9e9a3de82f006a3ed4dde3fbcf77a267f0d44ba95b4c0847efe1644b0591281e95008b7fc22e018085ea01692005a480065080a0360385080e1460394a855ff817c9a83d0d01ce508739541b17a0137fe0c4917ae03bee7a37b9e8bedf08fbe22fee9497ba51fecf847fffffffffffffffffffffffffffff800000017db75f65b44545d53755f73f3ad4db84f753af18e37ec434e120000007fed`;
203+
const result = parseByteString(input);
204+
expect(result.hasHex).toBe(true);
205+
// Both messages should be concatenated
206+
expect(result.bytes.length).toBeGreaterThan(100);
207+
// Check start of first message
208+
expect(result.bytes[0]).toBe(0x03);
209+
expect(result.bytes[1]).toBe(0x3a);
210+
});
186211
});
187212

188213
describe('mixed and edge cases', () => {

utils/ByteStringParser.ts

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,17 @@ function parseSingleByteString(input: string): ParsedBytes {
7474
const allHexPattern = /^[0-9a-fA-F]+$/;
7575

7676
if (allHexPattern.test(onlyValid) && /[a-fA-F]/.test(onlyValid) && onlyValid.length >= 4 && onlyValid.length % 2 === 0) {
77-
// Calculate how much of the original input is hex chars
77+
// Check if there's a continuous hex sequence in the ORIGINAL input that's long enough (10+ bytes = 20+ chars)
78+
// This handles cases where hex is embedded in messy logs
79+
const continuousHexPattern = /[0-9a-fA-F]{20,}/g;
80+
const continuousSequences = normalized.match(continuousHexPattern) || [];
81+
82+
if (continuousSequences.length > 0) {
83+
// Found a long continuous hex sequence, likely hex mode
84+
return parseHexBytes(normalized);
85+
}
86+
87+
// Fallback: calculate how much of the original input is hex chars
7888
const hexDensity = onlyValid.length / normalized.replace(/\s/g, '').length;
7989
// If > 70% of non-whitespace chars are valid hex, likely continuous hex
8090
if (hexDensity > 0.7) {
@@ -94,7 +104,58 @@ function parseSingleByteString(input: string): ParsedBytes {
94104
function parseHexBytes(input: string): ParsedBytes {
95105
const bytes: number[] = [];
96106

97-
// First, try to find 0x-prefixed hex values
107+
// First, check for long continuous hex sequences (4+ characters)
108+
// These indicate actual hex messages embedded in logs
109+
// Do this EARLY before other patterns to prioritize them
110+
const continuousHexPattern = /[0-9a-fA-F]{4,}/g;
111+
const sequences = input.match(continuousHexPattern) || [];
112+
113+
// If we found significant continuous sequences, extract all 4+ character sequences
114+
// This handles cases where multiple messages are in the same input
115+
if (sequences.length > 0) {
116+
// Concatenate ALL sequences (even short ones like "2025") to handle multi-message logs
117+
// But we'll sort them so we preserve order: keep them in original order from input
118+
const hexOnly = sequences.join('');
119+
120+
// However, if we have a very long sequence (>= 20 chars), prefer just that
121+
const significantSequences = sequences.filter(s => s.length >= 20);
122+
if (significantSequences.length > 0) {
123+
// Use only the significant sequences
124+
const significantHex = significantSequences.join('');
125+
126+
// Parse as pairs of hex digits
127+
for (let i = 0; i < significantHex.length; i += 2) {
128+
const byteStr = significantHex.substring(i, i + 2);
129+
if (byteStr.length === 2) {
130+
const value = parseInt(byteStr, 16);
131+
if (!isNaN(value)) {
132+
bytes.push(value);
133+
}
134+
}
135+
}
136+
137+
if (bytes.length > 0) {
138+
return { bytes, hasHex: true };
139+
}
140+
} else {
141+
// Use all sequences if none are particularly long
142+
for (let i = 0; i < hexOnly.length; i += 2) {
143+
const byteStr = hexOnly.substring(i, i + 2);
144+
if (byteStr.length === 2) {
145+
const value = parseInt(byteStr, 16);
146+
if (!isNaN(value)) {
147+
bytes.push(value);
148+
}
149+
}
150+
}
151+
152+
if (bytes.length > 0) {
153+
return { bytes, hasHex: true };
154+
}
155+
}
156+
}
157+
158+
// Try to find 0x-prefixed hex values
98159
// Match 0x followed by exactly 1 or 2 hex digits
99160
// We'll match greedily and process all of them
100161
const hex0xPattern = /0x([0-9a-fA-F]{1,2})/gi;
@@ -153,7 +214,7 @@ function parseHexBytes(input: string): ParsedBytes {
153214
return { bytes, hasHex: true };
154215
}
155216

156-
// Last resort: extract all valid hex characters as continuous hex
217+
// Final fallback: extract all valid hex characters as continuous hex
157218
// This handles cases like "ffff" without spaces
158219
const hexOnly = input.replace(/[^0-9a-fA-F]/g, '');
159220

0 commit comments

Comments
 (0)