Skip to content
This repository was archived by the owner on Nov 25, 2023. It is now read-only.

Commit

Permalink
implement proxied image search #1
Browse files Browse the repository at this point in the history
  • Loading branch information
ghost committed May 4, 2023
1 parent baf78e2 commit 6b18202
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 13 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Could be enabled or disabled by `API_SEARCH_ENABLED` option
```
GET action=search - required
GET query={string} - optional, search request, empty if not provided
GET type={string} - optional, search type, image|default or empty
GET page={int} - optional, search results page, 1 if not provided
GET mode=SphinxQL - optional, enable extended SphinxQL syntax
```
Expand Down Expand Up @@ -142,7 +143,7 @@ GET m=SphinxQL
* [x] Add robots.txt support (Issue #2)
* [ ] Improve yggdrasil links detection, add .ygg domain zone support
* [ ] Make page description visible - based on the cached content dump, when website description tag not available, add condition highlights
* [ ] Images search (basically implemented but requires testing and some performance optimization)
* [x] Images search (basically implemented but requires testing and some performance optimization)
* [x] Index cleaner
* [ ] Crawl queue balancer, that depends from CPU available
* [ ] Implement smart queue algorithm that indexing new sites homepage in higher priority
Expand Down
34 changes: 34 additions & 0 deletions library/mysql.php
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,15 @@ public function getHostImageToHostPage(int $hostImageId, int $hostPageId) {
return $query->fetch();
}

public function getHostImageHostPages(int $hostImageId) {

$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ?');

$query->execute([$hostImageId]);

return $query->fetchAll();
}

public function addHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) {

$query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`,
Expand Down Expand Up @@ -346,6 +355,31 @@ public function getFoundHostPage(int $hostPageId) {
return $query->fetch();
}

public function getFoundHostImage(int $hostImageId) {

$query = $this->_db->prepare('SELECT `hostImage`.`uri`,
`hostImage`.`rank`,
`host`.`scheme`,
`host`.`name`,
`host`.`port`,
(SELECT GROUP_CONCAT(CONCAT_WS(" | ", `hostImageDescription`.`alt`, `hostImageDescription`.`title`))
FROM `hostImageDescription`
WHERE `hostImageDescription`.`hostImageId` = `hostImage`.`hostImageId`) AS `description`
FROM `hostImage`
JOIN `host` ON (`host`.`hostId` = `hostImage`.`hostId`)
WHERE `hostImage`.`hostImageId` = ?
LIMIT 1');

$query->execute([$hostImageId]);

return $query->fetch();
}

public function addHostPage(int $hostId,
int $crc32uri,
string $uri,
Expand Down
28 changes: 28 additions & 0 deletions library/sphinxql.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,25 @@ public function searchHostPages(string $keyword, int $start, int $limit, int $ma
return $query->fetchAll();
}

public function searchHostImages(string $keyword, int $start, int $limit, int $maxMatches) {

$query = $this->_sphinx->prepare('SELECT *, WEIGHT() AS `weight`
FROM `hostImage`
WHERE MATCH(?)
ORDER BY `rank` DESC, WEIGHT() DESC
LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . '
OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1));

$query->execute([$keyword]);

return $query->fetchAll();
}

public function searchHostPagesTotal(string $keyword) {

$query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `hostPage` WHERE MATCH(?)');
Expand All @@ -38,4 +57,13 @@ public function searchHostPagesTotal(string $keyword) {

return $query->fetch()->total;
}

public function searchHostImagesTotal(string $keyword) {

$query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `hostImage` WHERE MATCH(?)');

$query->execute([$keyword]);

return $query->fetch()->total;
}
}
36 changes: 30 additions & 6 deletions public/api.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,48 @@


// Filter request data
$type = !empty($_GET['type']) ? Filter::url($_GET['type']) : 'page';
$mode = !empty($_GET['mode']) ? Filter::url($_GET['mode']) : 'default';
$query = !empty($_GET['query']) ? Filter::url($_GET['query']) : '';
$page = !empty($_GET['page']) ? (int) $_GET['page'] : 1;

// Make search request
$sphinxResultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($query, $mode));
$sphinxResults = $sphinx->searchHostPages(Filter::searchQuery($query, $mode), $page * API_SEARCH_PAGINATION_RESULTS_LIMIT - API_SEARCH_PAGINATION_RESULTS_LIMIT, API_SEARCH_PAGINATION_RESULTS_LIMIT, $sphinxResultsTotal);
// Make image search request
if (!empty($type) && $type == 'image') {

$sphinxResultsTotal = $sphinx->searchHostImagesTotal(Filter::searchQuery($query, $mode));
$sphinxResults = $sphinx->searchHostImages(Filter::searchQuery($query, $mode), $page * API_SEARCH_PAGINATION_RESULTS_LIMIT - API_SEARCH_PAGINATION_RESULTS_LIMIT, API_SEARCH_PAGINATION_RESULTS_LIMIT, $sphinxResultsTotal);

// Make default search request
} else {

$sphinxResultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($query, $mode));
$sphinxResults = $sphinx->searchHostPages(Filter::searchQuery($query, $mode), $page * API_SEARCH_PAGINATION_RESULTS_LIMIT - API_SEARCH_PAGINATION_RESULTS_LIMIT, API_SEARCH_PAGINATION_RESULTS_LIMIT, $sphinxResultsTotal);
}

// Generate results
$dbResults = [];

foreach ($sphinxResults as $i => $sphinxResult) {

if ($hostPage = $db->getFoundHostPage($sphinxResult->id)) {
// Image
if (!empty($type) && $type == 'image') {

if ($hostImage = $db->getFoundHostImage($sphinxResult->id)) {

$dbResults[$i] = $hostImage;

$dbResults[$i]->weight = $sphinxResult->weight;
}

// Default
} else {

if ($hostPage = $db->getFoundHostPage($sphinxResult->id)) {

$dbResults[$i] = $hostPage;
$dbResults[$i] = $hostPage;

$dbResults[$i]->weight = $sphinxResult->weight;
$dbResults[$i]->weight = $sphinxResult->weight;
}
}
}

Expand Down
91 changes: 85 additions & 6 deletions public/search.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
]);

// Filter request data
$t = !empty($_GET['t']) ? Filter::url($_GET['t']) : 'page';
$m = !empty($_GET['m']) ? Filter::url($_GET['m']) : 'default';
$q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
$p = !empty($_GET['p']) ? (int) $_GET['p'] : 1;
Expand Down Expand Up @@ -107,8 +108,16 @@
// Search request
if (!empty($q)) {

$resultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($q, $m));
$results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, $resultsTotal);
if (!empty($t) && $t == 'image') {

$resultsTotal = $sphinx->searchHostImagesTotal(Filter::searchQuery($q, $m));
$results = $sphinx->searchHostImages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, $resultsTotal);

} else {

$resultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($q, $m));
$results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, $resultsTotal);
}

} else {

Expand Down Expand Up @@ -177,6 +186,14 @@
color: #fff;
}

h3 {
display: block;
font-size: 16px;
font-weight: normal;
margin: 8px 0;
color: #fff;
}

form {
display: block;
max-width: 678px;
Expand Down Expand Up @@ -208,6 +225,19 @@
color: #090808
}

label {
font-size: 14px;
position: fixed;
top: 30px;
right: 120px;
color: #fff
}

label > input {
width: auto;
margin: 0 4px;
}

button {
padding: 12px 16px;
border-radius: 4px;
Expand Down Expand Up @@ -235,12 +265,17 @@
color: #54a3f7;
}

img {
img.icon {
float: left;
border-radius: 50%;
margin-right: 8px;
}

img.image {
max-width: 100%;
border-radius: 3px;
}

div {
max-width: 640px;
margin: 0 auto;
Expand All @@ -262,6 +297,7 @@
<form name="search" method="GET" action="<?php echo WEBSITE_DOMAIN; ?>/search.php">
<h1><a href="<?php echo WEBSITE_DOMAIN; ?>"><?php echo _('YGGo!') ?></a></h1>
<input type="text" name="q" placeholder="<?php echo $placeholder ?>" value="<?php echo htmlentities($q) ?>" />
<label><input type="checkbox" name="t" value="image" <?php echo (!empty($t) && $t == 'image' ? 'checked="checked"' : false) ?>/> <?php echo _('Images') ?></label>
<button type="submit"><?php echo _('Search'); ?></button>
</form>
</header>
Expand All @@ -274,15 +310,58 @@
<?php } ?>
</div>
<?php foreach ($results as $result) { ?>
<?php if ($hostPage = $db->getFoundHostPage($result->id)) { ?>
<?php $hostPageURL = $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>
<?php if (!empty($t) && $t == 'image' &&
$hostImage = $db->getFoundHostImage($result->id)) { ?>
<?php

// Built image url
$hostImageURL = $hostImage->scheme . '://' .
$hostImage->name .
($hostImage->port ? ':' . $hostImage->port : false) .
$hostImage->uri;

// Convert remote image to base64 string for the privacy reasons
if (!$hostImageType = @pathinfo($hostImageURL, PATHINFO_EXTENSION)) continue;
if (!$hostImageData = @file_get_contents($hostImageURL)) continue;
if (!$hostImageBase64 = @base64_encode($hostImageData)) continue;

$hostImageURLencoded = 'data:image/' . $hostImageType . ';base64,' . $hostImageBase64;

?>
<div>
<a href="<?php echo $hostImageURL ?>">
<img src="<?php echo $hostImageURLencoded ?>" alt="<?php echo $hostImage->description ?>" title="<?php echo $hostImageURL ?>" class="image" />
</a>
<?php foreach ((array) $db->getHostImageHostPages($result->id) as $hostPage) { ?>
<?php if ($hostPage = $db->getFoundHostPage($hostPage->hostPageId)) { ?>
<?php $hostPageURL = $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>
<h3><?php echo $hostPage->metaTitle ?></h3>
<?php if (!empty($hostImage->description)) { ?>
<span><?php echo $hostImage->description ?></span>
<?php } ?>
<a href="<?php echo $hostPageURL ?>">
<img src="<?php echo WEBSITE_DOMAIN ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />
<?php echo $hostPageURL ?>
</a>
<?php } ?>
<?php } ?>
</div>
<?php } else if ($hostPage = $db->getFoundHostPage($result->id)) { ?>
<?php

$hostPageURL = $hostPage->scheme . '://' .
$hostPage->name .
($hostPage->port ? ':' . $hostPage->port : false) .
$hostPage->uri;

?>
<div>
<h2><?php echo $hostPage->metaTitle ?></h2>
<?php if (!empty($hostPage->metaDescription)) { ?>
<span><?php echo $hostPage->metaDescription ?></span>
<?php } ?>
<a href="<?php echo $hostPageURL ?>">
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" />
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />
<?php echo $hostPageURL ?>
</a>
</div>
Expand Down

0 comments on commit 6b18202

Please sign in to comment.