diff --git a/lib/Service/FilesService.php b/lib/Service/FilesService.php
index a95a2bd..7e50acb 100644
--- a/lib/Service/FilesService.php
+++ b/lib/Service/FilesService.php
@@ -898,6 +898,18 @@ private function parseMimeTypeText(string $mimeType, string $extension, string &
throw new KnownFileMimeTypeException();
}
+ // 20220219 Parse XML files as TEXT files
+ if (substr($mimeType, 0, 15) === 'application/xml') {
+ $parsed = self::MIMETYPE_TEXT;
+ throw new KnownFileMimeTypeException();
+ }
+
+ // 20220219 Parse .drawio file
+ if ($extension === 'drawio') {
+ $parsed = self::MIMETYPE_TEXT;
+ throw new KnownFileMimeTypeException();
+ }
+
$textMimes = [
'application/epub+zip'
];
@@ -1039,14 +1051,77 @@ private function extractContentFromFilePDF(FilesDocument $document, File $file)
return;
}
- try {
- $document->setContent(
- base64_encode($file->getContent()), IIndexDocument::ENCODED_BASE64
- );
- } catch (NotPermittedException | LockedException $e) {
+ // 20220219 Inflate drawio file
+ if ( $file->getExtension() === 'drawio') {
+ $content = $file->getContent();
+
+ try {
+ $xml = simplexml_load_string($content);
+
+ // Initialize $content
+ $content = '';
+
+ foreach ($xml->diagram as $child) {
+ $deflated_content = (string)$child;
+ $base64decoded = base64_decode($deflated_content);
+ $urlencoded_content = gzinflate($base64decoded);
+ $urldecoded_content = urldecode($urlencoded_content);
+
+ // Remove image tag
+ $diagram_str = preg_replace('/style=\"shape=image[^"]*\"/', '', $urldecoded_content);
+
+ // Construct XML
+ $diagram_xml = simplexml_load_string($diagram_str);
+ $content = $content . ' ' . $this->readDrawioXmlValue($diagram_xml);
+ }
+
+ } catch (\Throwable $t) {
+ }
+
+ try {
+ $document->setContent(
+ // 20220219 Pass content of inflated drawio graph xml
+ base64_encode($content), IIndexDocument::ENCODED_BASE64
+ );
+ } catch (NotPermittedException | LockedException $e) {
+ }
+ } else {
+ try {
+ $document->setContent(
+ base64_encode($file->getContent()), IIndexDocument::ENCODED_BASE64
+ );
+ } catch (NotPermittedException | LockedException $e) {
+ }
}
}
+ // 20220220 Read Draw.io XML elements and return a space separated
+ // strings, stripped of HTML tags, to be indexed.
+ /**
+ * @param SimpleXMLElement $element
+ *
+ * @return string
+ */
+ private function readDrawioXmlValue(\SimpleXMLElement $element) {
+ $str = '';
+ if( $element['value'] != null && trim(strval($element['value'])) !== '') {
+ $str = $str . " " . trim(strval($element['value']));
+ }
+ if( $element != null && trim(strval($element)) !== '') {
+ $str = $str . " " . trim(strval($element));
+ }
+
+ try {
+ foreach ($element->children() as $child) {
+ $str = $str . " " . $this->readDrawioXmlValue($child);
+ }
+ } finally {
+ }
+
+ // Strip HTML tags
+ $str_without_tags = preg_replace('/<[^>]*>/', ' ', $str);
+ return $str_without_tags;
+ }
/**
* @param FilesDocument $document
diff --git a/test.drawio b/test.drawio
new file mode 100644
index 0000000..67dda3f
--- /dev/null
+++ b/test.drawio
@@ -0,0 +1 @@
+jZLBToQwEIafhqMJ0Mh6FtfdiydMPDftSJsUStphAZ/eItMFJCaemPnm73T6Dwkrm/HieKferAST5KkcE/aS5HlWFHn4zGRayImdFlA7LUm0gkp/AcGUaK8l+J0QrTWouz0Utm1B4I5x5+ywl31as7+14zUcQCW4OdIPLVEt9OkxXfkVdK3izVlKlYZHMQGvuLTDBrFzwkpnLS5RM5ZgZvOiL8u51z+q98EctPifA7SIGzc9ve0dPD5UvRDgPbXzOMWXO9u3EubDacKeB6URqo6LuTqEXQemsDEhy0J4HIYa3sAhjBtEw13ANoBuChKqsoKMmn7lw+p7Fs1UG8+jjtOq63vr1Y0QkCExXY3/qW1+X3b+Bg==jZJNb4MwDIZ/DcdKQDS6XUdpd1h3odLOEfFIpkBQMAX66xeGU0DVpJ1wHn/y2gFLq+FkeSPPRoAO4lAMATsEcRwlSew+Exlnsmf7GZRWCQpaQK5uQDAk2ikB7SYQjdGomi0sTF1DgRvGrTX9NuzL6G3XhpfwAPKC60f6qQTKmT4/hQt/A1VK3zkKyVNxH0yglVyYfoVYFrDUGoOzVQ0p6Ek8r8ucd/zDex/MQo3/SagP7IKnl/fzR6ay/Ijft+S6oypXrjv64Qu0uMu7ooC2pd21OHo5rOlqAVPFMGCvvVQIecOLydu7A3BMYqXdK3Lm44S+HViEYYVo4hOYCtCOLoS8LCH16HziPb37ZRmRV1iuFuHzOO2/vJdeJHIGqeSfyzZ+faubZtkP
\ No newline at end of file
diff --git a/test.xml b/test.xml
new file mode 100644
index 0000000..33f9c06
--- /dev/null
+++ b/test.xml
@@ -0,0 +1,4 @@
+
+Test-key-success
+Test-value-success
+
\ No newline at end of file