From ed42df627454dbeeff96f309aa1f65d290848cb0 Mon Sep 17 00:00:00 2001 From: Masahiro Minami Date: Sun, 13 Mar 2022 23:24:10 +0900 Subject: [PATCH 1/2] Add XML, Draw.IO support Signed-off-by: Masahiro Minami --- lib/Service/FilesService.php | 85 +++++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 5 deletions(-) diff --git a/lib/Service/FilesService.php b/lib/Service/FilesService.php index a95a2bd..7e50acb 100644 --- a/lib/Service/FilesService.php +++ b/lib/Service/FilesService.php @@ -898,6 +898,18 @@ private function parseMimeTypeText(string $mimeType, string $extension, string & throw new KnownFileMimeTypeException(); } + // 20220219 Parse XML files as TEXT files + if (substr($mimeType, 0, 15) === 'application/xml') { + $parsed = self::MIMETYPE_TEXT; + throw new KnownFileMimeTypeException(); + } + + // 20220219 Parse .drawio file + if ($extension === 'drawio') { + $parsed = self::MIMETYPE_TEXT; + throw new KnownFileMimeTypeException(); + } + $textMimes = [ 'application/epub+zip' ]; @@ -1039,14 +1051,77 @@ private function extractContentFromFilePDF(FilesDocument $document, File $file) return; } - try { - $document->setContent( - base64_encode($file->getContent()), IIndexDocument::ENCODED_BASE64 - ); - } catch (NotPermittedException | LockedException $e) { + // 20220219 Inflate drawio file + if ( $file->getExtension() === 'drawio') { + $content = $file->getContent(); + + try { + $xml = simplexml_load_string($content); + + // Initialize $content + $content = ''; + + foreach ($xml->diagram as $child) { + $deflated_content = (string)$child; + $base64decoded = base64_decode($deflated_content); + $urlencoded_content = gzinflate($base64decoded); + $urldecoded_content = urldecode($urlencoded_content); + + // Remove image tag + $diagram_str = preg_replace('/style=\"shape=image[^"]*\"/', '', $urldecoded_content); + + // Construct XML + $diagram_xml = simplexml_load_string($diagram_str); + $content = $content . ' ' . $this->readDrawioXmlValue($diagram_xml); + } + + } catch (\Throwable $t) { + } + + try { + $document->setContent( + // 20220219 Pass content of inflated drawio graph xml + base64_encode($content), IIndexDocument::ENCODED_BASE64 + ); + } catch (NotPermittedException | LockedException $e) { + } + } else { + try { + $document->setContent( + base64_encode($file->getContent()), IIndexDocument::ENCODED_BASE64 + ); + } catch (NotPermittedException | LockedException $e) { + } } } + // 20220220 Read Draw.io XML elements and return a space separated + // strings, stripped of HTML tags, to be indexed. + /** + * @param SimpleXMLElement $element + * + * @return string + */ + private function readDrawioXmlValue(\SimpleXMLElement $element) { + $str = ''; + if( $element['value'] != null && trim(strval($element['value'])) !== '') { + $str = $str . " " . trim(strval($element['value'])); + } + if( $element != null && trim(strval($element)) !== '') { + $str = $str . " " . trim(strval($element)); + } + + try { + foreach ($element->children() as $child) { + $str = $str . " " . $this->readDrawioXmlValue($child); + } + } finally { + } + + // Strip HTML tags + $str_without_tags = preg_replace('/<[^>]*>/', ' ', $str); + return $str_without_tags; + } /** * @param FilesDocument $document From a6a465b88667690d1a17db5ef05a2eb69e845c08 Mon Sep 17 00:00:00 2001 From: Masahiro Minami Date: Sat, 14 May 2022 10:30:50 +0900 Subject: [PATCH 2/2] Added test files for xml and drawio support. Signed-off-by: Masahiro Minami --- test.drawio | 1 + test.xml | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 test.drawio create mode 100644 test.xml diff --git a/test.drawio b/test.drawio new file mode 100644 index 0000000..67dda3f --- /dev/null +++ b/test.drawio @@ -0,0 +1 @@ +jZLBToQwEIafhqMJ0Mh6FtfdiydMPDftSJsUStphAZ/eItMFJCaemPnm73T6Dwkrm/HieKferAST5KkcE/aS5HlWFHn4zGRayImdFlA7LUm0gkp/AcGUaK8l+J0QrTWouz0Utm1B4I5x5+ywl31as7+14zUcQCW4OdIPLVEt9OkxXfkVdK3izVlKlYZHMQGvuLTDBrFzwkpnLS5RM5ZgZvOiL8u51z+q98EctPifA7SIGzc9ve0dPD5UvRDgPbXzOMWXO9u3EubDacKeB6URqo6LuTqEXQemsDEhy0J4HIYa3sAhjBtEw13ANoBuChKqsoKMmn7lw+p7Fs1UG8+jjtOq63vr1Y0QkCExXY3/qW1+X3b+Bg==jZJNb4MwDIZ/DcdKQDS6XUdpd1h3odLOEfFIpkBQMAX66xeGU0DVpJ1wHn/y2gFLq+FkeSPPRoAO4lAMATsEcRwlSew+Exlnsmf7GZRWCQpaQK5uQDAk2ikB7SYQjdGomi0sTF1DgRvGrTX9NuzL6G3XhpfwAPKC60f6qQTKmT4/hQt/A1VK3zkKyVNxH0yglVyYfoVYFrDUGoOzVQ0p6Ek8r8ucd/zDex/MQo3/SagP7IKnl/fzR6ay/Ijft+S6oypXrjv64Qu0uMu7ooC2pd21OHo5rOlqAVPFMGCvvVQIecOLydu7A3BMYqXdK3Lm44S+HViEYYVo4hOYCtCOLoS8LCH16HziPb37ZRmRV1iuFuHzOO2/vJdeJHIGqeSfyzZ+faubZtkP \ No newline at end of file diff --git a/test.xml b/test.xml new file mode 100644 index 0000000..33f9c06 --- /dev/null +++ b/test.xml @@ -0,0 +1,4 @@ + +Test-key-success +Test-value-success + \ No newline at end of file