From d5054808bce2872f24f5d51e8dbb1b5f86b61b47 Mon Sep 17 00:00:00 2001 From: Ella <4710635+ellatrix@users.noreply.github.com> Date: Tue, 10 Oct 2023 08:08:05 +0300 Subject: [PATCH] Paste: fix MS Word list paste (#55127) * Paste: fix MS Word list paste * Match mso-list:Ignore * Fix inline paste --- .../src/api/raw-handling/ms-list-converter.js | 31 +++++++------------ .../src/api/raw-handling/ms-list-ignore.js | 27 ++++++++++++++++ .../src/api/raw-handling/paste-handler.js | 2 ++ .../raw-handling/test/ms-list-converter.js | 18 +++++------ .../blocks-raw-handling.test.js.snap | 4 ++- test/integration/blocks-raw-handling.test.js | 1 + .../fixtures/documents/ms-word-list-in.html | 28 +++++++++++++++++ .../fixtures/documents/ms-word-list-out.html | 25 +++++++++++++++ 8 files changed, 106 insertions(+), 30 deletions(-) create mode 100644 packages/blocks/src/api/raw-handling/ms-list-ignore.js create mode 100644 test/integration/fixtures/documents/ms-word-list-in.html create mode 100644 test/integration/fixtures/documents/ms-word-list-out.html diff --git a/packages/blocks/src/api/raw-handling/ms-list-converter.js b/packages/blocks/src/api/raw-handling/ms-list-converter.js index 8b45a5ab53fdb..fdbc48398a1cc 100644 --- a/packages/blocks/src/api/raw-handling/ms-list-converter.js +++ b/packages/blocks/src/api/raw-handling/ms-list-converter.js @@ -3,6 +3,12 @@ */ const { parseInt } = window; +/** + * Internal dependencies + */ +import { deepFilterHTML } from './utils'; +import msListIgnore from './ms-list-ignore'; + function isList( node ) { return node.nodeName === 'OL' || node.nodeName === 'UL'; } @@ -14,23 +20,10 @@ export default function msListConverter( node, doc ) { const style = node.getAttribute( 'style' ); - if ( ! style ) { - return; - } - - // Quick check. - if ( style.indexOf( 'mso-list' ) === -1 ) { - return; - } - - const matches = /mso-list\s*:[^;]+level([0-9]+)/i.exec( style ); - - if ( ! matches ) { + if ( ! style || ! style.includes( 'mso-list' ) ) { return; } - let level = parseInt( matches[ 1 ], 10 ) - 1 || 0; - const prevNode = node.previousElementSibling; // Add new list if no previous. @@ -53,13 +46,11 @@ export default function msListConverter( node, doc ) { let receivingNode = listNode; - // Remove the first span with list info. - node.removeChild( node.firstChild ); - // Add content. - while ( node.firstChild ) { - listItem.appendChild( node.firstChild ); - } + listItem.innerHTML = deepFilterHTML( node.innerHTML, [ msListIgnore ] ); + + const matches = /mso-list\s*:[^;]+level([0-9]+)/i.exec( style ); + let level = matches ? parseInt( matches[ 1 ], 10 ) - 1 || 0 : 0; // Change pointer depending on indentation level. while ( level-- ) { diff --git a/packages/blocks/src/api/raw-handling/ms-list-ignore.js b/packages/blocks/src/api/raw-handling/ms-list-ignore.js new file mode 100644 index 0000000000000..d1ed421e3b76c --- /dev/null +++ b/packages/blocks/src/api/raw-handling/ms-list-ignore.js @@ -0,0 +1,27 @@ +/** + * Looks for comments, and removes them. + * + * @param {Node} node The node to be processed. + * @return {void} + */ +export default function msListIgnore( node ) { + if ( node.nodeType !== node.ELEMENT_NODE ) { + return; + } + + const style = node.getAttribute( 'style' ); + + if ( ! style || ! style.includes( 'mso-list' ) ) { + return; + } + + const rules = style.split( ';' ).reduce( ( acc, rule ) => { + const [ key, value ] = rule.split( ':' ); + acc[ key.trim().toLowerCase() ] = value.trim().toLowerCase(); + return acc; + }, {} ); + + if ( rules[ 'mso-list' ] === 'ignore' ) { + node.remove(); + } +} diff --git a/packages/blocks/src/api/raw-handling/paste-handler.js b/packages/blocks/src/api/raw-handling/paste-handler.js index c4ad40e0b1f50..849ce681959cd 100644 --- a/packages/blocks/src/api/raw-handling/paste-handler.js +++ b/packages/blocks/src/api/raw-handling/paste-handler.js @@ -17,6 +17,7 @@ import isInlineContent from './is-inline-content'; import phrasingContentReducer from './phrasing-content-reducer'; import headRemover from './head-remover'; import msListConverter from './ms-list-converter'; +import msListIgnore from './ms-list-ignore'; import listReducer from './list-reducer'; import imageCorrector from './image-corrector'; import blockquoteNormaliser from './blockquote-normaliser'; @@ -49,6 +50,7 @@ function filterInlineHTML( HTML, preserveWhiteSpace ) { HTML = deepFilterHTML( HTML, [ headRemover, googleDocsUIDRemover, + msListIgnore, phrasingContentReducer, commentRemover, ] ); diff --git a/packages/blocks/src/api/raw-handling/test/ms-list-converter.js b/packages/blocks/src/api/raw-handling/test/ms-list-converter.js index a7c58dfa03010..5ae7da68f16a9 100644 --- a/packages/blocks/src/api/raw-handling/test/ms-list-converter.js +++ b/packages/blocks/src/api/raw-handling/test/ms-list-converter.js @@ -7,7 +7,7 @@ import { deepFilterHTML } from '../utils'; describe( 'msListConverter', () => { it( 'should convert unordered list', () => { const input = - '

* test

'; + '

* test

'; const output = ''; expect( deepFilterHTML( input, [ msListConverter ] ) ).toEqual( output @@ -16,7 +16,7 @@ describe( 'msListConverter', () => { it( 'should convert ordered list', () => { const input = - '

1 test

'; + '

1 test

'; const output = '
  1. test
'; expect( deepFilterHTML( input, [ msListConverter ] ) ).toEqual( output @@ -25,11 +25,11 @@ describe( 'msListConverter', () => { it( 'should convert indented list', () => { const input1 = - '

* test

'; + '

* test

'; const input2 = - '

* test

'; + '

* test

'; const input3 = - '

* test

'; + '

* test

'; const output = ''; expect( @@ -39,13 +39,13 @@ describe( 'msListConverter', () => { it( 'should convert deep indented list', () => { const input1 = - '

* test

'; + '

* test

'; const input2 = - '

* test

'; + '

* test

'; const input3 = - '

* test

'; + '

* test

'; const input4 = - '

* test

'; + '

* test

'; const output = ''; expect( diff --git a/test/integration/__snapshots__/blocks-raw-handling.test.js.snap b/test/integration/__snapshots__/blocks-raw-handling.test.js.snap index 178a57b875553..6a39ca9ed7d65 100644 --- a/test/integration/__snapshots__/blocks-raw-handling.test.js.snap +++ b/test/integration/__snapshots__/blocks-raw-handling.test.js.snap @@ -26,7 +26,9 @@ exports[`Blocks raw handling pasteHandler iframe-embed 1`] = `""`; exports[`Blocks raw handling pasteHandler markdown 1`] = `"This is a heading with italic
This is a paragraph with a link, bold, and strikethrough.
Preserve
line breaks please.
Lists
A
Bulleted Indented
List
One
Two
Three
Table
First Header
Second Header
Content from cell 1
Content from cell 2
Content in the first column
Content in the second column



Table with empty cells.
Quote
First
Second
Code
Inline code tags should work.
This is a code block."`; -exports[`Blocks raw handling pasteHandler ms-word 1`] = `"This is a title
 
This is a subtitle
 
This is a heading level 1
 
This is a heading level 2
 
This is a paragraph with a link.
 
·      A
·      Bulleted
o   Indented
·      List
 
1      One
2      Two
3      Three
 
One
Two
Three
1
2
3
I
II
III
 
An image:
 

This is an anchor link that leads to the next paragraph.
This is the paragraph with the anchor.
This is an anchor link that leads nowhere.
This is a paragraph with an anchor with no link pointing to it.
This is a reference to a footnote[1].
This is a reference to an endnote[i].


[1] This is a footnote.


[i] This is an endnote."`; +exports[`Blocks raw handling pasteHandler ms-word 1`] = `"This is a title
 
This is a subtitle
 
This is a heading level 1
 
This is a heading level 2
 
This is a paragraph with a link.
 
A
Bulleted
Indented
List
 
One
Two
Three
 
One
Two
Three
1
2
3
I
II
III
 
An image:
 

This is an anchor link that leads to the next paragraph.
This is the paragraph with the anchor.
This is an anchor link that leads nowhere.
This is a paragraph with an anchor with no link pointing to it.
This is a reference to a footnote[1].
This is a reference to an endnote[i].


[1] This is a footnote.


[i] This is an endnote."`; + +exports[`Blocks raw handling pasteHandler ms-word-list 1`] = `"This is a headline?
This is a text:
One
Two
Three
Lorem Ipsum.
 "`; exports[`Blocks raw handling pasteHandler ms-word-online 1`] = `"This is a heading 
This is a paragraph with a link

Bulleted 
Indented 
List 
 
One 
Two 
Three 

One 
Two 
Three 




II 
III 
 
An image: 
 "`; diff --git a/test/integration/blocks-raw-handling.test.js b/test/integration/blocks-raw-handling.test.js index 2a31d0b0ceaa2..229fa0ba7761c 100644 --- a/test/integration/blocks-raw-handling.test.js +++ b/test/integration/blocks-raw-handling.test.js @@ -383,6 +383,7 @@ describe( 'Blocks raw handling', () => { 'google-docs-table-with-comments', 'google-docs-with-comments', 'ms-word', + 'ms-word-list', 'ms-word-styled', 'ms-word-online', 'evernote', diff --git a/test/integration/fixtures/documents/ms-word-list-in.html b/test/integration/fixtures/documents/ms-word-list-in.html new file mode 100644 index 0000000000000..8cf79b8f7e5db --- /dev/null +++ b/test/integration/fixtures/documents/ms-word-list-in.html @@ -0,0 +1,28 @@ +

This is a headline?

+ +

This is a text:

+ +

·       +One

+ +

·       +Two

+ +

·       +Three

+ + + +

Lorem Ipsum.

+ +

 

\ No newline at end of file diff --git a/test/integration/fixtures/documents/ms-word-list-out.html b/test/integration/fixtures/documents/ms-word-list-out.html new file mode 100644 index 0000000000000..f57946f64bc98 --- /dev/null +++ b/test/integration/fixtures/documents/ms-word-list-out.html @@ -0,0 +1,25 @@ + +

This is a headline?

+ + + +

This is a text:

+ + + + + + + +

Lorem Ipsum.

+ \ No newline at end of file