Skip to content

Commit

Permalink
Paste: fix MS Word list paste (#55127)
Browse files Browse the repository at this point in the history
* Paste: fix MS Word list paste

* Match mso-list:Ignore

* Fix inline paste
  • Loading branch information
ellatrix authored and mikachan committed Oct 10, 2023
1 parent 80e0020 commit d505480
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 30 deletions.
31 changes: 11 additions & 20 deletions packages/blocks/src/api/raw-handling/ms-list-converter.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
*/
const { parseInt } = window;

/**
* Internal dependencies
*/
import { deepFilterHTML } from './utils';
import msListIgnore from './ms-list-ignore';

function isList( node ) {
return node.nodeName === 'OL' || node.nodeName === 'UL';
}
Expand All @@ -14,23 +20,10 @@ export default function msListConverter( node, doc ) {

const style = node.getAttribute( 'style' );

if ( ! style ) {
return;
}

// Quick check.
if ( style.indexOf( 'mso-list' ) === -1 ) {
return;
}

const matches = /mso-list\s*:[^;]+level([0-9]+)/i.exec( style );

if ( ! matches ) {
if ( ! style || ! style.includes( 'mso-list' ) ) {
return;
}

let level = parseInt( matches[ 1 ], 10 ) - 1 || 0;

const prevNode = node.previousElementSibling;

// Add new list if no previous.
Expand All @@ -53,13 +46,11 @@ export default function msListConverter( node, doc ) {

let receivingNode = listNode;

// Remove the first span with list info.
node.removeChild( node.firstChild );

// Add content.
while ( node.firstChild ) {
listItem.appendChild( node.firstChild );
}
listItem.innerHTML = deepFilterHTML( node.innerHTML, [ msListIgnore ] );

const matches = /mso-list\s*:[^;]+level([0-9]+)/i.exec( style );
let level = matches ? parseInt( matches[ 1 ], 10 ) - 1 || 0 : 0;

// Change pointer depending on indentation level.
while ( level-- ) {
Expand Down
27 changes: 27 additions & 0 deletions packages/blocks/src/api/raw-handling/ms-list-ignore.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* Looks for comments, and removes them.
*
* @param {Node} node The node to be processed.
* @return {void}
*/
export default function msListIgnore( node ) {
if ( node.nodeType !== node.ELEMENT_NODE ) {
return;
}

const style = node.getAttribute( 'style' );

if ( ! style || ! style.includes( 'mso-list' ) ) {
return;
}

const rules = style.split( ';' ).reduce( ( acc, rule ) => {
const [ key, value ] = rule.split( ':' );
acc[ key.trim().toLowerCase() ] = value.trim().toLowerCase();
return acc;
}, {} );

if ( rules[ 'mso-list' ] === 'ignore' ) {
node.remove();
}
}
2 changes: 2 additions & 0 deletions packages/blocks/src/api/raw-handling/paste-handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import isInlineContent from './is-inline-content';
import phrasingContentReducer from './phrasing-content-reducer';
import headRemover from './head-remover';
import msListConverter from './ms-list-converter';
import msListIgnore from './ms-list-ignore';
import listReducer from './list-reducer';
import imageCorrector from './image-corrector';
import blockquoteNormaliser from './blockquote-normaliser';
Expand Down Expand Up @@ -49,6 +50,7 @@ function filterInlineHTML( HTML, preserveWhiteSpace ) {
HTML = deepFilterHTML( HTML, [
headRemover,
googleDocsUIDRemover,
msListIgnore,
phrasingContentReducer,
commentRemover,
] );
Expand Down
18 changes: 9 additions & 9 deletions packages/blocks/src/api/raw-handling/test/ms-list-converter.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { deepFilterHTML } from '../utils';
describe( 'msListConverter', () => {
it( 'should convert unordered list', () => {
const input =
'<p style="mso-list:l0 level1 lfo1"><span>* </span>test</p>';
'<p style="mso-list:l0 level1 lfo1"><span style="mso-list:Ignore">* </span>test</p>';
const output = '<ul><li>test</li></ul>';
expect( deepFilterHTML( input, [ msListConverter ] ) ).toEqual(
output
Expand All @@ -16,7 +16,7 @@ describe( 'msListConverter', () => {

it( 'should convert ordered list', () => {
const input =
'<p style="mso-list:l0 level1 lfo1"><span>1 </span>test</p>';
'<p style="mso-list:l0 level1 lfo1"><span style="mso-list:Ignore">1 </span>test</p>';
const output = '<ol type="1"><li>test</li></ol>';
expect( deepFilterHTML( input, [ msListConverter ] ) ).toEqual(
output
Expand All @@ -25,11 +25,11 @@ describe( 'msListConverter', () => {

it( 'should convert indented list', () => {
const input1 =
'<p style="mso-list:l0 level1 lfo1"><span>* </span>test</p>';
'<p style="mso-list:l0 level1 lfo1"><span style="mso-list:Ignore">* </span>test</p>';
const input2 =
'<p style="mso-list:l0 level2 lfo1"><span>* </span>test</p>';
'<p style="mso-list:l0 level2 lfo1"><span style="mso-list:Ignore">* </span>test</p>';
const input3 =
'<p style="mso-list:l0 level1 lfo1"><span>* </span>test</p>';
'<p style="mso-list:l0 level1 lfo1"><span style="mso-list:Ignore">* </span>test</p>';
const output =
'<ul><li>test<ul><li>test</li></ul></li><li>test</li></ul>';
expect(
Expand All @@ -39,13 +39,13 @@ describe( 'msListConverter', () => {

it( 'should convert deep indented list', () => {
const input1 =
'<p style="mso-list:l0 level1 lfo1"><span>* </span>test</p>';
'<p style="mso-list:l0 level1 lfo1"><span style="mso-list:Ignore">* </span>test</p>';
const input2 =
'<p style="mso-list:l0 level2 lfo1"><span>* </span>test</p>';
'<p style="mso-list:l0 level2 lfo1"><span style="mso-list:Ignore">* </span>test</p>';
const input3 =
'<p style="mso-list:l0 level3 lfo1"><span>* </span>test</p>';
'<p style="mso-list:l0 level3 lfo1"><span style="mso-list:Ignore">* </span>test</p>';
const input4 =
'<p style="mso-list:l0 level1 lfo1"><span>* </span>test</p>';
'<p style="mso-list:l0 level1 lfo1"><span style="mso-list:Ignore">* </span>test</p>';
const output =
'<ul><li>test<ul><li>test<ul><li>test</li></ul></li></ul></li><li>test</li></ul>';
expect(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ exports[`Blocks raw handling pasteHandler iframe-embed 1`] = `""`;

exports[`Blocks raw handling pasteHandler markdown 1`] = `"This is a heading with <em>italic</em><br>This is a paragraph with a <a href="https://w.org/">link</a>, <strong>bold</strong>, and <del>strikethrough</del>.<br>Preserve<br>line breaks please.<br>Lists<br>A<br>Bulleted Indented<br>List<br>One<br>Two<br>Three<br>Table<br>First Header<br>Second Header<br>Content from cell 1<br>Content from cell 2<br>Content in the first column<br>Content in the second column<br><br><br><br>Table with empty cells.<br>Quote<br>First<br>Second<br>Code<br>Inline <code>code</code> tags should work.<br><code>This is a code block.</code>"`;

exports[`Blocks raw handling pasteHandler ms-word 1`] = `"This is a title<br>&nbsp;<br>This is a subtitle<br>&nbsp;<br>This is a heading level 1<br>&nbsp;<br>This is a heading level 2<br>&nbsp;<br>This is a <strong>paragraph</strong> with a <a href="https://w.org/">link</a>.<br>&nbsp;<br>·&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; A<br>·&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Bulleted<br>o&nbsp;&nbsp; Indented<br>·&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; List<br>&nbsp;<br>1&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; One<br>2&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Two<br>3&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Three<br>&nbsp;<br>One<br>Two<br>Three<br>1<br>2<br>3<br>I<br>II<br>III<br>&nbsp;<br>An image:<br>&nbsp;<br><img width="451" height="338" src="file:LOW-RES.png"><br><a href="#anchor">This is an anchor link</a> that leads to the next paragraph.<br><a id="anchor">This is the paragraph with the anchor.</a><br><a href="#nowhere">This is an anchor link</a> that leads nowhere.<br><a>This is a paragraph with an anchor with no link pointing to it.</a><br>This is a reference to a footnote<a href="#_ftn1" id="_ftnref1">[1]</a>.<br>This is a reference to an endnote<a href="#_edn1" id="_ednref1">[i]</a>.<br><br><br><a href="#_ftnref1" id="_ftn1">[1]</a> This is a footnote.<br><br><br><a href="#_ednref1" id="_edn1">[i]</a> This is an endnote."`;
exports[`Blocks raw handling pasteHandler ms-word 1`] = `"This is a title<br>&nbsp;<br>This is a subtitle<br>&nbsp;<br>This is a heading level 1<br>&nbsp;<br>This is a heading level 2<br>&nbsp;<br>This is a <strong>paragraph</strong> with a <a href="https://w.org/">link</a>.<br>&nbsp;<br>A<br>Bulleted<br>Indented<br>List<br>&nbsp;<br>One<br>Two<br>Three<br>&nbsp;<br>One<br>Two<br>Three<br>1<br>2<br>3<br>I<br>II<br>III<br>&nbsp;<br>An image:<br>&nbsp;<br><img width="451" height="338" src="file:LOW-RES.png"><br><a href="#anchor">This is an anchor link</a> that leads to the next paragraph.<br><a id="anchor">This is the paragraph with the anchor.</a><br><a href="#nowhere">This is an anchor link</a> that leads nowhere.<br><a>This is a paragraph with an anchor with no link pointing to it.</a><br>This is a reference to a footnote<a href="#_ftn1" id="_ftnref1">[1]</a>.<br>This is a reference to an endnote<a href="#_edn1" id="_ednref1">[i]</a>.<br><br><br><a href="#_ftnref1" id="_ftn1">[1]</a> This is a footnote.<br><br><br><a href="#_ednref1" id="_edn1">[i]</a> This is an endnote."`;

exports[`Blocks raw handling pasteHandler ms-word-list 1`] = `"<a>This is a headline?</a><br>This is a text:<br>One<br>Two<br>Three<br><a>Lorem Ipsum.</a><br>&nbsp;"`;

exports[`Blocks raw handling pasteHandler ms-word-online 1`] = `"This is a <em>heading</em>&nbsp;<br>This is a <strong>paragraph </strong>with a <a href="https://w.org/" target="_blank" rel="noreferrer noopener">link</a>.&nbsp;<br>A&nbsp;<br>Bulleted&nbsp;<br>Indented&nbsp;<br>List&nbsp;<br>&nbsp;<br>One&nbsp;<br>Two&nbsp;<br>Three&nbsp;<br><br>One&nbsp;<br>Two&nbsp;<br>Three&nbsp;<br>1&nbsp;<br>2&nbsp;<br>3&nbsp;<br>I&nbsp;<br>II&nbsp;<br>III&nbsp;<br>&nbsp;<br>An image:&nbsp;<br><img src="data:image/jpeg;base64,###">&nbsp;"`;

Expand Down
1 change: 1 addition & 0 deletions test/integration/blocks-raw-handling.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ describe( 'Blocks raw handling', () => {
'google-docs-table-with-comments',
'google-docs-with-comments',
'ms-word',
'ms-word-list',
'ms-word-styled',
'ms-word-online',
'evernote',
Expand Down
28 changes: 28 additions & 0 deletions test/integration/fixtures/documents/ms-word-list-in.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<h3><a name="_Hlk64885792">This is a headline?<o:p></o:p></a></h3>

<p class=MsoNormal><span style='mso-bookmark:_Hlk64885792'>This is a text:<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpFirst style='text-indent:-18.0pt;mso-list:l0 level1 lfo1'><span
style='mso-bookmark:_Hlk64885792'><![if !supportLists]><span style='font-family:
Symbol;mso-fareast-font-family:Symbol;mso-bidi-font-family:Symbol'><span
style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]>One <o:p></o:p></span></p>

<p class=MsoListParagraphCxSpMiddle style='text-indent:-18.0pt;mso-list:l0 level1 lfo1'><span
style='mso-bookmark:_Hlk64885792'><![if !supportLists]><span style='font-family:
Symbol;mso-fareast-font-family:Symbol;mso-bidi-font-family:Symbol'><span
style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]>Two<o:p></o:p></span></p>

<p class=MsoListParagraphCxSpLast style='text-indent:-18.0pt;mso-list:l0 level1 lfo1'><span
style='mso-bookmark:_Hlk64885792'><![if !supportLists]><span style='font-family:
Symbol;mso-fareast-font-family:Symbol;mso-bidi-font-family:Symbol'><span
style='mso-list:Ignore'>·<span style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span></span></span><![endif]>Three<o:p></o:p></span></p>

<span style='mso-bookmark:_Hlk64885792'></span>

<p class=MsoNormal><a name="_Hlk64885540">Lorem Ipsum. </a><o:p></o:p></p>

<p class=MsoNormal><span style='font-family:"Calibri",sans-serif;mso-ascii-theme-font:
minor-latin;mso-hansi-theme-font:minor-latin;mso-bidi-theme-font:minor-latin'><o:p>&nbsp;</o:p></span></p>
25 changes: 25 additions & 0 deletions test/integration/fixtures/documents/ms-word-list-out.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<!-- wp:heading {"level":3} -->
<h3 class="wp-block-heading"><a>This is a headline?</a></h3>
<!-- /wp:heading -->

<!-- wp:paragraph -->
<p>This is a text:</p>
<!-- /wp:paragraph -->

<!-- wp:list -->
<ul><!-- wp:list-item -->
<li>One</li>
<!-- /wp:list-item -->

<!-- wp:list-item -->
<li>Two</li>
<!-- /wp:list-item -->

<!-- wp:list-item -->
<li>Three</li>
<!-- /wp:list-item --></ul>
<!-- /wp:list -->

<!-- wp:paragraph -->
<p><a>Lorem Ipsum.</a></p>
<!-- /wp:paragraph -->

0 comments on commit d505480

Please sign in to comment.