12
12
13
13
14
14
import java .io .Reader ;
15
- import java . lang . reflect . Field ;
16
-
17
- import org . jsoup . safety . Cleaner ;
18
- import org .jsoup .safety . Safelist ;
19
- import com . overzealous . remark . Options ;
20
- import com . overzealous . remark . Options . Tables ;
21
- import com . overzealous . remark . Remark ;
15
+ import com . vladsch . flexmark . html2md . converter . FlexmarkHtmlConverter ;
16
+ import com . vladsch . flexmark . parser . PegdownExtensions ;
17
+ import com . vladsch . flexmark . util . data . DataKey ;
18
+ import org .jsoup .Jsoup ;
19
+ import org . jsoup . nodes . Document ;
20
+ import org . jsoup . nodes . Element ;
21
+ import org . jsoup . select . Elements ;
22
22
import org .slf4j .Logger ;
23
23
import org .slf4j .LoggerFactory ;
24
24
30
30
public class JavaDoc2MarkdownConverter extends AbstractJavaDocConverter {
31
31
private static final Logger LOGGER = LoggerFactory .getLogger (JavaDoc2MarkdownConverter .class );
32
32
33
- private static Remark remark ;
34
-
35
- static {
36
- Options options = new Options ();
37
- options .tables = Tables .MULTI_MARKDOWN ;
38
- options .hardwraps = true ;
39
- options .inlineLinks = true ;
40
- options .autoLinks = true ;
41
- options .reverseHtmlSmartPunctuation = true ;
42
- remark = new Remark (options );
43
- //Stop remark from stripping file and jdt protocols in an href
44
- try {
45
- Field cleanerField = Remark .class .getDeclaredField ("cleaner" );
46
- cleanerField .setAccessible (true );
47
-
48
- Cleaner c = (Cleaner ) cleanerField .get (remark );
49
-
50
- Field safelistField = Cleaner .class .getDeclaredField ("safelist" );
51
- safelistField .setAccessible (true );
33
+ private static final String LINE_SEPARATOR = "\n " ;
52
34
53
- Safelist s = (Safelist ) safelistField .get (c );
54
-
55
- s .addProtocols ("a" , "href" , "file" , "jdt" );
56
- s .addProtocols ("img" , "src" , "file" );
57
- } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e ) {
58
- LOGGER .error ("Unable to modify jsoup to include file and jdt protocols" , e );
59
- }
60
- }
35
+ final static public DataKey <Integer > HTML_EXTENSIONS = new DataKey <>("HTML_EXTENSIONS" , 0
36
+ //| Extensions.ABBREVIATIONS
37
+ //| Extensions.EXTANCHORLINKS /*| Extensions.EXTANCHORLINKS_WRAP*/
38
+ //| Extensions.AUTOLINKS
39
+ //| Extensions.DEFINITIONS
40
+ | PegdownExtensions .FENCED_CODE_BLOCKS
41
+ //| Extensions.FORCELISTITEMPARA
42
+ //| Extensions.HARDWRAPS
43
+ //| Extensions.ATXHEADERSPACE
44
+ //| Extensions.QUOTES
45
+ //| Extensions.SMARTS
46
+ //| Extensions.RELAXEDHRULES
47
+ //| Extensions.STRIKETHROUGH
48
+ //| Extensions.SUPPRESS_HTML_BLOCKS
49
+ //| Extensions.SUPPRESS_INLINE_HTML
50
+ //| Extensions.TABLES
51
+ //| Extensions.TASKLISTITEMS
52
+ //| Extensions.WIKILINKS
53
+ //| Extensions.TRACE_PARSER
54
+ );
55
+ private static final FlexmarkHtmlConverter CONVERTER = FlexmarkHtmlConverter .builder ().build ();
61
56
62
57
public JavaDoc2MarkdownConverter (Reader reader ) {
63
58
super (reader );
@@ -68,7 +63,53 @@ public JavaDoc2MarkdownConverter(String javadoc) {
68
63
}
69
64
70
65
@ Override
71
- String convert (String rawHtml ) {
72
- return remark .convert (rawHtml );
66
+ public String convert (String html ) {
67
+ Document document = Jsoup .parse (html );
68
+ //Add missing table headers if necessary, else most Markdown renderers will crap out
69
+ document .select ("table" ).forEach (JavaDoc2MarkdownConverter ::addMissingTableHeaders );
70
+
71
+ String markdown = CONVERTER .convert (document );
72
+ if (markdown .endsWith (LINE_SEPARATOR )) {// FlexmarkHtmlConverter keeps adding an extra line
73
+ markdown = markdown .substring (0 , markdown .length () - LINE_SEPARATOR .length ());
74
+ }
75
+
76
+ return markdown ;
77
+ }
78
+
79
+ /**
80
+ * Adds a new row header if the given table doesn't have any.
81
+ *
82
+ * @param table
83
+ * the HTML table to check for a header
84
+ */
85
+ private static void addMissingTableHeaders (Element table ) {
86
+ int numCols = 0 ;
87
+ for (Element child : table .children ()) {
88
+ if ("thead" .equals (child .nodeName ())) {
89
+ // Table already has a header, nothing else to do
90
+ return ;
91
+ }
92
+ if ("tbody" .equals (child .nodeName ())) {
93
+ Elements rows = child .getElementsByTag ("tr" );
94
+ if (!rows .isEmpty ()) {
95
+ for (Element row : rows ) {
96
+ int colSize = row .getElementsByTag ("td" ).size ();
97
+ //Keep the biggest column size
98
+ if (colSize > numCols ) {
99
+ numCols = colSize ;
100
+ }
101
+ }
102
+ }
103
+ }
104
+ }
105
+ if (numCols > 0 ) {
106
+ //Create a new header row based on the number of columns already found
107
+ Element newHeader = new Element ("tr" );
108
+ for (int i = 0 ; i < numCols ; i ++) {
109
+ newHeader .appendChild (new Element ("th" ));
110
+ }
111
+ //Insert header row in 1st position in the table
112
+ table .insertChildren (0 , newHeader );
113
+ }
73
114
}
74
115
}
0 commit comments