|
| 1 | +<?php |
| 2 | + |
| 3 | +/* |
| 4 | + * This file should be put in the islandora modules plugins directory |
| 5 | + * typical usage: drush -u 1 islandora_update_book_rels path_to_query_file TRUE |
| 6 | + * the above would give you an interactive update for each book. If TRUE is changed to FALSE it will run against all |
| 7 | + * books returned from the query |
| 8 | + * |
| 9 | + * If this is being run from a multi site env. you will probably have to use the --uri switch |
| 10 | + * 'drush -u 1 --uri=http://localhost islandora_update_book_rels /path/to/query.txt TRUE'. You should also be in the subsites |
| 11 | + * sites directory or a subdirectory of it. |
| 12 | + * |
| 13 | + */ |
| 14 | + |
| 15 | +//drush hook |
| 16 | + |
| 17 | +function islandora_lowercase_hocr_drush_command() { |
| 18 | + $items = array(); |
| 19 | + |
| 20 | + $items['islandora_lowercase_hocr'] = array( |
| 21 | + 'description' => "updates a books pages so the hocr datastream is all lower case", |
| 22 | + 'arguments' => array( |
| 23 | + 'query_file' => 'The path to a plain text file that contains an itql query. This query should return a list of books (pids)', |
| 24 | + 'interactive' => 'if TRUE then you will be asked to confirm the update for each book object' |
| 25 | + ), |
| 26 | + 'examples' => array( |
| 27 | + 'drush -u 1 --uri=http://localhost islandora_lowercase_hocr /path/to/query.txt TRUE', |
| 28 | + ), |
| 29 | + 'aliases' => array('islandorlowerhocr'), |
| 30 | + 'bootstrap' => DRUSH_BOOTSTRAP_DRUPAL_LOGIN, // we can pass in users id on the command line using drush -u. |
| 31 | + ); |
| 32 | + |
| 33 | + return $items; |
| 34 | +} |
| 35 | + |
| 36 | +/** |
| 37 | + * This is the drush command specified in the array create by the drush entry point. |
| 38 | + * |
| 39 | + * This function checks to make sure parameters are supplied and if everything is ok |
| 40 | + * calls the doAction function |
| 41 | + * |
| 42 | + * @param string $query_file |
| 43 | + * path to a text file that contains an itql query |
| 44 | + * @param boolean $interactive |
| 45 | + * |
| 46 | + */ |
| 47 | +function drush_islandora_lowercase_hocr($query_file, $interactive) { |
| 48 | + drush_print('Current working directory ' . getcwd()); |
| 49 | + if (isset($query_file)) { |
| 50 | + drush_print(" using query file" . $query_file); |
| 51 | + } |
| 52 | + else { |
| 53 | + drush_print(" no query_file found"); |
| 54 | + return; |
| 55 | + } |
| 56 | + islandora_lowercase_hocr_doAction($query_file, $interactive); |
| 57 | +} |
| 58 | + |
| 59 | +/** |
| 60 | + * Iterates through all the books returned by the itql query and updates the rels stream for the pages of each book |
| 61 | + * |
| 62 | + * @param string $query_file |
| 63 | + * @param boolean $interactive |
| 64 | + * |
| 65 | + */ |
| 66 | +function islandora_lowercase_hocr_doAction($query_file, $interactive) { |
| 67 | + module_load_include('inc', 'fedora_repository', 'api/fedora_item'); |
| 68 | + $query = file_get_contents($query_file); |
| 69 | + drush_print($query); |
| 70 | + if (!isset($query)) { |
| 71 | + drush_die('no query found'); |
| 72 | + return; |
| 73 | + } |
| 74 | + $results = islandora_run_query($query); |
| 75 | + $objects = islandora_sparql_results_as_array($results); |
| 76 | + foreach ($objects as $object) { |
| 77 | + drush_print($object); |
| 78 | + } |
| 79 | + $num = count($objects); |
| 80 | + if (!drush_confirm(dt('are you sure you want to update all pages in @num of books?', array('@num' => $num)))) { |
| 81 | + // was: return drush_set_error('CORE_SYNC_ABORT', 'Aborting.'); |
| 82 | + drush_die('Aborting.'); |
| 83 | + } |
| 84 | + |
| 85 | + if ($interactive == 'TRUE') { |
| 86 | + foreach ($objects as $object) { |
| 87 | + if (drush_confirm(dt('update all page hocr streams in this book @pid ?', array('@pid' => $object)))) { |
| 88 | + //$item = new Fedora_Item($object); |
| 89 | + islandora_update_books_hocr_streams($object); |
| 90 | + } |
| 91 | + } |
| 92 | + } |
| 93 | + else { |
| 94 | + foreach ($objects as $object) { |
| 95 | + //$item = new Fedora_Item($object); |
| 96 | + islandora_update_books_hocr_streams($object); |
| 97 | + } |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +/** |
| 102 | + * returns an array of pids |
| 103 | + * @todo pull this up into an api |
| 104 | + * @param SimpleXMLElement $content |
| 105 | + * @return array |
| 106 | + */ |
| 107 | +/** |
| 108 | + function islandora_sparql_results_as_array($content) { |
| 109 | + $content = new SimpleXMLElement($content); |
| 110 | +
|
| 111 | + $resultsarray = array(); |
| 112 | + foreach ($content->results->result as $result) { |
| 113 | + $resultsarray[] = substr($result->object->attributes()->uri, 12); // Remove 'info:fedora/'. |
| 114 | + } |
| 115 | + return $resultsarray; |
| 116 | + } */ |
| 117 | + |
| 118 | +/** |
| 119 | + * update all the pages objects rels-ext of this book |
| 120 | + * |
| 121 | + * @param string $book_pid |
| 122 | + * |
| 123 | + */ |
| 124 | +function islandora_update_books_hocr_streams($book_pid) { |
| 125 | + module_load_include('inc', 'islandora_book', 'book_pack_utils'); |
| 126 | + module_load_include('inc', 'fedora_repository', 'api/fedora_item'); |
| 127 | + module_load_include('inc', 'islandora_fedora_api', 'islandora_fedora_api.raw'); |
| 128 | + //$fapim = new FedoraAPIM(NULL); |
| 129 | + |
| 130 | + $page_pids = islandora_update_book_rels_get_page_pids($book_pid); |
| 131 | + $page_num = 1; |
| 132 | + |
| 133 | + //update all the page rels |
| 134 | + foreach ($page_pids as $pid) { |
| 135 | + /* try { |
| 136 | + $response = $fapim->addRelationship($pid, 'info:islandora/islandora-system:def/paged-item-info#sequenceNumber', $page_num++, $optional_args = array('isLiteral' => 'TRUE')); |
| 137 | + drush_print(dt('updated sequence number for @pid', array('@pid' => $pid))); |
| 138 | + } catch (Exception $e) { |
| 139 | + drush_print(dt('Error updating relationship isPageNumber for object %pid', array('%pid' => $pid))); |
| 140 | + } |
| 141 | + try { |
| 142 | + $response = $fapim->addRelationship($pid, 'info:islandora/islandora-system:def/paged-item-info#section', '1' , $optional_args = array('isLiteral' => 'TRUE')); |
| 143 | + drush_print(dt('updated section number for @pid', array('@pid' => $pid))); |
| 144 | + } catch (Exception $e) { |
| 145 | + drush_print(dt('Error updating relationship isPageNumber for object %pid', array('%pid' => $pid))); |
| 146 | + } |
| 147 | + try { |
| 148 | + $response = $fapim->addRelationship($pid, 'info:islandora/islandora-system:def/paged-item-info#isPageOf', $book_pid); |
| 149 | + drush_print(dt('updated ispageOf for @pid', array('@pid' => $pid))); |
| 150 | + } catch (Excepition $e) { |
| 151 | + drush_print(dt('Error updating relationship isPageOf for object %pid', array('%pid' => $pid))); |
| 152 | + } */ |
| 153 | + |
| 154 | + islandora_update_page_hocr_streams($book_pid, $pid, $page_num++); |
| 155 | + time_nanosleep(0, 200000000); //give the server some rest between calls |
| 156 | + } |
| 157 | +} |
| 158 | + |
| 159 | +function islandora_update_page_hocr_streams($book_pid, $pid, $page_number) { |
| 160 | + $rdf_string = <<<RDF |
| 161 | + <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> |
| 162 | +<rdf:Description rdf:about="info:fedora/$pid"> |
| 163 | + <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/islandora:pageCModel"></hasModel> |
| 164 | + <isSequenceNumber xmlns="http://islandora.ca/ontology/relsext#">$page_number</isSequenceNumber> |
| 165 | + <isMemberOf xmlns="info:fedora/fedora-system:def/relations-external#" rdf:resource="info:fedora/$book_pid"></isMemberOf> |
| 166 | + <isSection xmlns="http://islandora.ca/ontology/relsext#">1</isSection> |
| 167 | + <isPageOf xmlns="http://islandora.ca/ontology/relsext#" rdf:resource="inf:fedora/$book_pid"></isPageOf> |
| 168 | + <isPageNumber xmlns="http://islandora.ca/ontology/relsext#">$page_number</isPageNumber> |
| 169 | +</rdf:Description> |
| 170 | +</rdf:RDF> |
| 171 | +RDF; |
| 172 | + //drush_print($rdf_string); |
| 173 | + $item = new Fedora_Item($pid); |
| 174 | + drupal_get_path('module', 'islandora_bookviewer'); |
| 175 | + try { |
| 176 | + $proc = new XsltProcessor(); |
| 177 | + $hocr = $item->get_datastream_dissemination('ENCODED_OCR'); |
| 178 | + } catch (Exception $e) { |
| 179 | + drush_print ('error loading xslt'); |
| 180 | + exit(); |
| 181 | + } |
| 182 | + $path = drupal_get_path ('module','islandora_bookviewer'); |
| 183 | + $xsl = new DomDocument(); |
| 184 | + $test = $xsl->load($path . '/xslt/hocrToLowerhocr.xsl'); |
| 185 | + $error = FALSE; |
| 186 | + if(!$test ){ |
| 187 | + drush_print ('Error loading style sheet when processing '.$pid); |
| 188 | + $error = TRUE; |
| 189 | + } |
| 190 | + if(empty($hocr)){ |
| 191 | + drush_print('Error loading hocr data when processiong '.$pid); |
| 192 | + $error = TRUE; |
| 193 | + } |
| 194 | + if($error){ |
| 195 | + $item->forget(); |
| 196 | + return; |
| 197 | + } |
| 198 | + $input = new DomDocument(); |
| 199 | + $input->loadXML($hocr); |
| 200 | + $xsl = $proc->importStylesheet($xsl); |
| 201 | + $newdom = $proc->transformToDoc($input); |
| 202 | + $content = $newdom->saveXML(); |
| 203 | + $value = $item->modify_datastream($content, 'ENCODED_OCR', 'ENCODED_OCR', 'text/xml', $force = FALSE, $logMessage='Modified by Islandora API using islandora_lowercase_hocr drush script', TRUE); |
| 204 | + //$value = $item->modify_datastream_by_value($rdf_string, 'RELS-EXT', 'RELS-EXT', 'text/xml', $force = FALSE, $logMessage = 'Modified by Drush using Islandora API', $quiet = FALSE); |
| 205 | + if (isset($value)) { |
| 206 | + drush_print('successfully updated ENCODED_OCR for ' . $pid); |
| 207 | + } |
| 208 | + else { |
| 209 | + drush_print('Error updating ENCODED_OCR for ' . $pid); |
| 210 | + } |
| 211 | + $item->forget(); |
| 212 | +} |
| 213 | + |
| 214 | +/** |
| 215 | + * |
| 216 | + * Returns the pids of all pages associated with a book object |
| 217 | + * @param string $book_pid |
| 218 | + * @return string |
| 219 | + */ |
| 220 | +/* |
| 221 | + function islandora_update_book_rels_get_page_pids($book_pid) { |
| 222 | + //return get_sorted_pages($book_pid); //not sure why we have two different functions |
| 223 | + $query = <<<XML |
| 224 | + select \$object \$title from <#ri> |
| 225 | + where (\$object <dc:title> \$title |
| 226 | + and \$object <fedora-rels-ext:isMemberOf> <info:fedora/$book_pid> |
| 227 | + and \$object <fedora-model:state> <info:fedora/fedora-system:def/model#Active>) |
| 228 | + order by \$title |
| 229 | + XML; |
| 230 | +
|
| 231 | +
|
| 232 | + module_load_include('inc', 'fedora_repository', 'CollectionClass'); |
| 233 | + $collection = new CollectionClass($book_pid); |
| 234 | + $contents = $collection->getRelatedItems($book_pid, $query); |
| 235 | + $results_xml = simplexml_load_string($contents); |
| 236 | + $resultsarray = array(); |
| 237 | + $book_pids = array(); |
| 238 | + foreach ($results_xml->results->result as $result) { |
| 239 | + $book_pids[] = str_replace('info:fedora/', '', $result->object->attributes()->uri); |
| 240 | + } |
| 241 | + return $book_pids; |
| 242 | + } */ |
| 243 | + |
| 244 | +/** |
| 245 | + * @todo pull this function up into an api and remove from here |
| 246 | + * executes a supplied itql query |
| 247 | + * @param string $query |
| 248 | + * @return type |
| 249 | + */ |
| 250 | +/* |
| 251 | +function islandora_run_query($query) { |
| 252 | + module_load_include('inc', 'fedora_repository', 'CollectionClass'); |
| 253 | + $cc = new CollectionClass(); |
| 254 | + $results = $cc->getRelatedItems(NULL, $query, 1000000); |
| 255 | + if (isset($results)) {//still doesn't tell us if these are valid results |
| 256 | + return $results; |
| 257 | + } |
| 258 | + else { |
| 259 | + drush_print('Error get related items, relationships cannot be updated'); |
| 260 | + } |
| 261 | +}*/ |
| 262 | + |
| 263 | +?> |
0 commit comments