@@ -39,21 +39,28 @@ type Replacement struct {
3939}
4040
4141type EscapedDataDetails struct {
42- ContentStartIndex int
43- ContentEndIndex int
44- NextPartIndex int
45- CurrentPartIndex int
46- OriginalByteSize int
42+ ContentStartIndex int
43+ ContentEndIndex int
44+ NextPartIndex int
45+ CurrentPartIndex int
46+ OriginalByteSize int
47+ SerializedPartRange SerializedPartRange
4748}
4849
49- type SerializedContentRange struct {
50+ type SerializedPartRange struct {
5051 From int
5152 To int
5253}
5354
55+ type SerializedReplaceResult struct {
56+ Pre []byte
57+ SerializedPortion []byte
58+ Post []byte
59+ }
60+
5461type SerializedContentReplacement struct {
5562 FixedContent []byte
56- SerializedContentRange []SerializedContentRange
63+ SerializedContentRange []SerializedPartRange
5764}
5865
5966type LinePartWithType struct {
@@ -161,201 +168,87 @@ func Debugf(format string, args ...interface{}) {
161168}
162169
163170func fixLine (line * []byte , replacements []* Replacement ) * []byte {
164- var fixedSerializedContent * SerializedContentReplacement = nil
165-
166- if bytes .Contains (* line , []byte ("s:" )) {
167- fixedSerializedContent = fixSerializedContent (line , replacements )
168- line = & fixedSerializedContent .FixedContent
169- }
170171
171172 Debugf ("Doing global replacements: %s\n " , string (* line ))
172173
173- var linePartsWithType []LinePartWithType
174-
175- if fixedSerializedContent != nil {
176- index := 0
177-
178- for _ , serializedContentRange := range fixedSerializedContent .SerializedContentRange {
179- linePartsWithType = append (linePartsWithType , LinePartWithType {
180- Content : (* line )[index :serializedContentRange .From ],
181- PhpSerialized : false ,
182- }, LinePartWithType {
183- Content : (* line )[serializedContentRange .From : serializedContentRange .To + 1 ],
184- PhpSerialized : true ,
185- })
186-
187- index = serializedContentRange .To + 1
188- }
189-
190- lastIndex := len (* line ) - 1
191-
192- if index <= lastIndex {
193- linePartsWithType = append (linePartsWithType , LinePartWithType {
194- Content : (* line )[index : lastIndex + 1 ],
195- PhpSerialized : false ,
196- })
197- }
198- } else {
199- linePartsWithType = []LinePartWithType {
200- {
201- Content : * line ,
202- PhpSerialized : false ,
203- },
204- }
205- }
206-
207- // Catch anything left
208- for _ , replacement := range replacements {
209- for index , linePartWithType := range linePartsWithType {
210- if linePartWithType .PhpSerialized == false {
211- linePartsWithType [index ].Content = bytes .ReplaceAll (linePartWithType .Content , replacement .From , replacement .To )
212-
213- Debugf ("After replacing unserialized part (from: %s | to: %s): %s\n " , replacement .From , replacement .To , string (linePartsWithType [index ].Content ))
214- }
215- }
216- }
217-
218- rebuiltLine := bytes .Join (func () [][]byte {
219- parts := make ([][]byte , len (linePartsWithType ))
220- for i , part := range linePartsWithType {
221- parts [i ] = part .Content
222- }
223- return parts
224- }(), nil )
225-
226- * line = rebuiltLine
227-
228- Debugf ("All done: %s\n " , string (* line ))
229-
230- return line
231- }
232-
233- func fixSerializedContent (line * []byte , replacements []* Replacement ) * SerializedContentReplacement {
234- index := 0
174+ linePart := * line
235175
236176 var rebuiltLine []byte
237177
238- var serializedContentRange []SerializedContentRange
239-
240- var result SerializedContentReplacement
241-
242- for index < len (* line ) {
243- Debugf ("Start of loop, index: %d\n " , index )
244- linePart := (* line )[index :]
245-
246- details , err := parseEscapedData (linePart )
247-
178+ for len (linePart ) > 0 {
179+ result , err := fixLineWithSerializedData (linePart , replacements )
248180 if err != nil {
249- // if right from the beginning, we couldn't find any string prefix,
250- if err .Error () == "could not find serialized string prefix" && index == 0 {
251- result = SerializedContentReplacement {
252- SerializedContentRange : serializedContentRange ,
253- FixedContent : * line ,
254- }
255- return & result
256- }
257-
258- // we've run out of things to parse, so just break out and append the rest
181+ Debugf ("Error when trying to fix line : %s\n " , err .Error ())
259182 rebuiltLine = append (rebuiltLine , linePart ... )
260183 break
261184 }
262-
263- // append all the string right before the part we found the next serialized string
264- rebuiltLine = append (rebuiltLine , (* line )[index :index + details .CurrentPartIndex ]... )
265-
266- content := linePart [details .ContentStartIndex : details .ContentEndIndex + 1 ]
267-
268- updatedContent := replaceInSerializedBytes (content , replacements )
269-
270- // php needs the unescaped length, so let's unescape it and measure the length
271- contentLength := len (unescapeContent (updatedContent ))
272-
273- // but if the content never changed, and there's an error in the original byte size, we'll let the error be for safety.
274- if bytes .Equal (content , updatedContent ) {
275- contentLength = details .OriginalByteSize
276- }
277-
278- // and we rebuild the string
279- rebuilt := "s:" + strconv .Itoa (contentLength ) + ":\\ \" " + string (updatedContent ) + "\\ \" ;"
280-
281- rebuiltLine = append (rebuiltLine , []byte (rebuilt )... )
282- serializedContentRange = append (serializedContentRange , SerializedContentRange {
283- From : index + details .CurrentPartIndex ,
284- To : index + len (rebuilt ) - 1 ,
285- })
286-
287- index = index + details .NextPartIndex
185+ rebuiltLine = append (rebuiltLine , result .Pre ... )
186+ rebuiltLine = append (rebuiltLine , result .SerializedPortion ... )
187+ linePart = result .Post
288188 }
289189
290- result = SerializedContentReplacement {
291- SerializedContentRange : serializedContentRange ,
292- FixedContent : rebuiltLine ,
293- }
190+ * line = rebuiltLine
294191
295- return & result
192+ Debugf ("All done: %s\n " , string (* line ))
193+
194+ return line
296195}
297196
298- func replaceInSerializedBytes ( serialized []byte , replacements []* Replacement ) []byte {
197+ func replaceByPart ( part []byte , replacements []* Replacement ) []byte {
299198 for _ , replacement := range replacements {
300- serialized = bytes .ReplaceAll (serialized , replacement .From , replacement .To )
199+ part = bytes .ReplaceAll (part , replacement .From , replacement .To )
301200 }
302- return serialized
201+ return part
303202}
304203
305204var serializedStringPrefixRegexp = regexp .MustCompile (`s:(\d+):\\"` )
306205
307- // Parses escaped data, returning the location details for further parsing
308- func parseEscapedData (linePart []byte ) (* EscapedDataDetails , error ) {
309-
310- details := EscapedDataDetails {
311- ContentStartIndex : 0 ,
312- ContentEndIndex : 0 ,
313- NextPartIndex : 0 ,
314- CurrentPartIndex : 0 ,
315- OriginalByteSize : 0 ,
316- }
206+ func fixLineWithSerializedData (linePart []byte , replacements []* Replacement ) (* SerializedReplaceResult , error ) {
317207
318208 // find starting point in the line
319209 //TODO: We should first check if we found the string when inside a quote or not.
320210 // but currently skipping that scenario because it seems unlikely to find it outside.
321211 match := serializedStringPrefixRegexp .FindSubmatchIndex (linePart )
322212 if match == nil {
323- return nil , fmt .Errorf ("could not find serialized string prefix" )
213+ return & SerializedReplaceResult {
214+ Pre : replaceByPart (linePart , replacements ),
215+ SerializedPortion : []byte {},
216+ Post : []byte {},
217+ }, nil
324218 }
325219
326- matchedAt := match [0 ]
327- originalBytes := linePart [match [2 ]:match [3 ]]
220+ pre := append ([]byte {}, linePart [:match [0 ]]... )
328221
329- details . OriginalByteSize , _ = strconv . Atoi ( string ( originalBytes ) )
222+ pre = replaceByPart ( pre , replacements )
330223
331- details .CurrentPartIndex = matchedAt
224+ if pre == nil {
225+ pre = []byte {}
226+ }
227+
228+ originalBytes := linePart [match [2 ]:match [3 ]]
229+
230+ originalByteSize , _ := strconv .Atoi (string (originalBytes ))
332231
333232 // the following assumes escaped double quotes
334233 //TODO: MySQL can optionally not escape the double quote,
335234 // but generally sqldumps always include the quotes.
336- initialContentIndex := match [3 ] + 3
235+ contentStartIndex := match [3 ] + 3
337236
338- details .ContentStartIndex = initialContentIndex
339-
340- currentContentIndex := initialContentIndex
237+ currentContentIndex := contentStartIndex
341238
342239 contentByteCount := 0
343240
344- var nextPartIndex int
241+ contentEndIndex := 0
242+
243+ var nextSliceIndex int
345244
346245 backslash := byte ('\\' )
347246 semicolon := byte (';' )
348247 quote := byte ('"' )
349- nextPartFound := false
350-
351- secondMatch := serializedStringPrefixRegexp .FindSubmatchIndex (linePart [matchedAt + 1 :])
248+ nextSliceFound := false
352249
353250 maxIndex := len (linePart ) - 1
354251
355- if secondMatch != nil {
356- maxIndex = secondMatch [0 ] + matchedAt
357- }
358-
359252 // let's find where the content actually ends.
360253 // it should end when the unescaped value is `";`
361254 for currentContentIndex < len (linePart ) {
@@ -368,7 +261,7 @@ func parseEscapedData(linePart []byte) (*EscapedDataDetails, error) {
368261 char := linePart [currentContentIndex ]
369262 secondChar := linePart [currentContentIndex + 1 ]
370263 thirdChar := linePart [currentContentIndex + 2 ]
371- if char == backslash && contentByteCount < details . OriginalByteSize {
264+ if char == backslash && contentByteCount < originalByteSize {
372265 unescapedBytePair := getUnescapedBytesIfEscaped (linePart [currentContentIndex : currentContentIndex + 2 ])
373266 // if we get the byte pair without the backslash, it corresponds to a byte
374267 contentByteCount += len (unescapedBytePair )
@@ -378,26 +271,46 @@ func parseEscapedData(linePart []byte) (*EscapedDataDetails, error) {
378271 continue
379272 }
380273
381- if char == backslash && secondChar == quote && thirdChar == semicolon && contentByteCount >= details .OriginalByteSize {
274+ if char == backslash && secondChar == quote && thirdChar == semicolon && contentByteCount >= originalByteSize {
275+
276+ // we're at backslash
382277
383- // since we've filtered out all the escaped value already, this should be the actual end
384- nextPartIndex = currentContentIndex + 3
385- details .NextPartIndex = nextPartIndex
278+ // index of the beginning of the next slice
279+ nextSliceIndex = currentContentIndex + 3
386280 // we're at backslash, so we need to minus 1 to get the index where the content finishes
387- details . ContentEndIndex = currentContentIndex - 1
388- nextPartFound = true
281+ contentEndIndex = currentContentIndex - 1
282+ nextSliceFound = true
389283 break
390284 }
391285
286+ if contentByteCount > originalByteSize {
287+ return nil , fmt .Errorf ("faulty data, byte count does not match data size" )
288+ }
289+
392290 contentByteCount ++
393291 currentContentIndex ++
394292 }
395293
396- if nextPartFound == false {
294+ content := append ([]byte {}, linePart [contentStartIndex :contentEndIndex + 1 ]... )
295+
296+ content = replaceByPart (content , replacements )
297+
298+ contentLength := len (unescapeContent (content ))
299+
300+ // and we rebuild the string
301+ rebuiltSerializedString := "s:" + strconv .Itoa (contentLength ) + ":\\ \" " + string (content ) + "\\ \" ;"
302+
303+ if nextSliceFound == false {
397304 return nil , fmt .Errorf ("end of serialized string not found" )
398305 }
399306
400- return & details , nil
307+ result := SerializedReplaceResult {
308+ Pre : pre ,
309+ SerializedPortion : []byte (rebuiltSerializedString ),
310+ Post : linePart [nextSliceIndex :],
311+ }
312+
313+ return & result , nil
401314}
402315
403316func getUnescapedBytesIfEscaped (charPair []byte ) []byte {
0 commit comments