Skip to content

Commit a1caa74

Browse files
committed
Docstore documentation
1 parent 36f5300 commit a1caa74

File tree

8 files changed

+121
-60
lines changed

8 files changed

+121
-60
lines changed

batch/cleandocstore.php

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class CleanDocstore_Batch {
3030
public $dry_run;
3131
/** @var bool */
3232
public $keep_temp;
33+
/** @var bool */
34+
public $only_temp;
3335
/** @var int */
3436
public $cutoff;
3537
/** @var DocumentHashMatcher */
@@ -49,10 +51,17 @@ function __construct(Conf $conf, $docstores, $arg) {
4951
$this->verbose = isset($arg["verbose"]);
5052
$this->dry_run = isset($arg["dry-run"]);
5153
$this->keep_temp = isset($arg["keep-temp"]);
54+
$this->only_temp = isset($arg["only-temp"]) || !$conf->s3_client();
5255
$this->cutoff = isset($arg["all"]) ? Conf::$now + 86400 : Conf::$now - 86400;
5356
$this->hash_matcher = new DocumentHashMatcher($arg["match"] ?? null);
5457
}
5558

59+
/** @param DocumentFileTreeMatch $fm
60+
* @return bool */
61+
static function is_temp($fm) {
62+
return $fm->tree->treeid === 1;
63+
}
64+
5665
/** @return ?DocumentFileTreeMatch */
5766
function fparts_random_match() {
5867
$fmatches = [];
@@ -66,7 +75,7 @@ function fparts_random_match() {
6675
++$j) {
6776
$fm = $ftree->random_match();
6877
if ($fm->is_complete()
69-
&& (($fm->treeid & 1) === 0
78+
&& (!self::is_temp($fm)
7079
|| max($fm->atime(), $fm->mtime()) < $this->cutoff)) {
7180
++$n;
7281
$fmatches[] = $fm;
@@ -75,7 +84,8 @@ function fparts_random_match() {
7584
}
7685
}
7786
if ($n === 0) {
78-
$this->ftrees[$i] = null;
87+
array_splice($this->ftrees, $i, 1);
88+
--$i;
7989
}
8090
}
8191
usort($fmatches, function ($a, $b) {
@@ -86,11 +96,11 @@ function fparts_random_match() {
8696
return $at ? -1 : ($bt ? 1 : 0);
8797
}
8898
$aage = Conf::$now - $at;
89-
if ($a->treeid & 1) {
99+
if (self::is_temp($a)) {
90100
$aage = $aage > 604800 ? 100000000 : $aage * 2;
91101
}
92102
$bage = Conf::$now - $bt;
93-
if ($b->treeid & 1) {
103+
if (self::is_temp($b)) {
94104
$bage = $bage > 604800 ? 100000000 : $bage * 2;
95105
}
96106
return $bage <=> $aage;
@@ -99,7 +109,7 @@ function fparts_random_match() {
99109
return null;
100110
} else {
101111
$fm = $fmatches[0];
102-
$this->ftrees[$fm->treeid]->hide($fm);
112+
$fm->tree->hide($fm);
103113
return $fm;
104114
}
105115
}
@@ -142,7 +152,7 @@ function run() {
142152
}
143153

144154
if (empty($this->docstores) || !$this->conf->docstore()) {
145-
throw new ErrorException("No docstore to clean");
155+
throw new CommandLineException("No docstore to clean");
146156
}
147157

148158
preg_match('/\A((?:\/[^\/%]*(?=\/|\z))+)/', $this->docstores[0], $m);
@@ -153,7 +163,7 @@ function run() {
153163
$ts = disk_total_space($usage_directory);
154164
$fs = disk_free_space($usage_directory);
155165
if ($ts === false || $fs === false) {
156-
throw new ErrorException("{$usage_directory}: Cannot evaluate free space");
166+
throw new CommandLineException("{$usage_directory}: Cannot evaluate free space");
157167
} else if ($fs >= $ts * (1 - ($this->max_usage ?? $this->min_usage))) {
158168
if (!$this->quiet) {
159169
fwrite(STDOUT, "{$usage_directory}: free space sufficient\n");
@@ -170,13 +180,13 @@ function run() {
170180

171181
foreach ($this->docstores as $i => $dp) {
172182
if (!str_starts_with($dp, "/") || strpos($dp, "%") === false) {
173-
throw new ErrorException("{$dp}: Bad docstore pattern");
183+
throw new CommandLineException("{$dp}: Bad docstore pattern");
184+
}
185+
if (!$this->only_temp) {
186+
$this->ftrees[] = new DocumentFileTree($dp, $this->hash_matcher, 0);
174187
}
175-
$this->ftrees[] = new DocumentFileTree($dp, $this->hash_matcher, count($this->ftrees));
176188
if (!$this->keep_temp) {
177-
$this->ftrees[] = new DocumentFileTree(Filer::docstore_fixed_prefix($dp) . "tmp/%w", $this->hash_matcher, count($this->ftrees));
178-
} else {
179-
$this->ftrees[] = null;
189+
$this->ftrees[] = new DocumentFileTree(Filer::docstore_fixed_prefix($dp) . "tmp/%w", $this->hash_matcher, 1);
180190
}
181191
}
182192

@@ -185,8 +195,7 @@ function run() {
185195
while ($count > 0
186196
&& ($usage_threshold === null || $bytesremoved < $usage_threshold)
187197
&& ($fm = $this->fparts_random_match())) {
188-
if (($fm->treeid & 1) !== 0
189-
|| $this->check_match($fm)) {
198+
if (self::is_temp($fm) || $this->check_match($fm)) {
190199
$size = filesize($fm->fname);
191200
if ($this->dry_run || unlink($fm->fname)) {
192201
if ($this->verbose) {
@@ -205,30 +214,31 @@ function run() {
205214
if (!$this->quiet) {
206215
fwrite(STDOUT, $usage_directory . ": " . ($this->dry_run ? "would remove " : "removed ") . plural($nsuccess, "file") . ", " . plural($bytesremoved, "byte") . "\n");
207216
}
208-
if ($nsuccess == 0) {
209-
fwrite(STDERR, "Nothing to delete\n");
217+
if ($nsuccess === 0 && !$this->quiet) {
218+
fwrite(STDERR, "Nothing to clean\n");
210219
}
211-
return $nsuccess && $nsuccess == $ndone ? 0 : 1;
220+
return $nsuccess > 0 && $nsuccess === $ndone ? 0 : 1;
212221
}
213222

214223
/** @return CleanDocstore_Batch */
215224
static function make_args($argv) {
216225
$arg = (new Getopt)->long(
217226
"name:,n: !",
218227
"config: !",
219-
"help,h",
220228
"count:,c: {n} =COUNT Clean up to COUNT files",
221229
"match:,m: =MATCH Clean files matching MATCH",
222-
"verbose,V",
223230
"dry-run,d Do not remove files",
224231
"max-usage:,u: {f} =FRAC Clean until usage is below FRAC",
225232
"min-usage:,U: {f} =FRAC Do not clean if usage is below FRAC",
226233
"all Clean all files, including files recently modified",
227-
"quiet,silent,q",
228-
"keep-temp",
229-
"docstore"
234+
"quiet,silent,q Be quiet",
235+
"keep-temp Keep temporary files",
236+
"only-temp Only clean temporary files",
237+
"help,h",
238+
"verbose,V Be more verbose",
239+
"docstore Output docstore patterns and exit"
230240
)->helpopt("help")
231-
->description("Remove files from HotCRP docstore that are on S3.
241+
->description("Remove old files from HotCRP docstore
232242
Usage: php batch/cleandocstore.php [-c COUNT|-u FRAC] [-V] [-d] [DOCSTORES...]\n")
233243
->parse($argv);
234244

devel/manual/docstore.md

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# HotCRP document store
2+
3+
HotCRP submission metadata are stored in the per-conference MySQL database. In
4+
default installations, document contents, such as PDF files, are also stored
5+
in the database. However, the system can also be configured to store document
6+
contents on the filesystem in a **document store** or **docstore**.
7+
8+
9+
## Configuring the docstore
10+
11+
Set up a document store by configuring `$Opt["docstore"]`. This setting is a
12+
filename pattern that sets where documents are stored on the filesystem.
13+
14+
To determine the filename for a document, HotCRP expands `%` escapes in
15+
`$Opt["docstore"]` using document information. The escapes are:
16+
17+
| Escape | Meaning | Examples |
18+
|:-------|:--------|:---------|
19+
| `%H` | Content hash | `d16c7976d9081368c7dca2da3a771065c3222069a1ad80dcd99d972b2efadc8b` |
20+
| `%NH` | First `N` bytes of content hash | `d16` (for `%3H`) |
21+
| `%a` | Hash algorithm | `sha256`, `sha1` |
22+
| `%A` | Hash algorithm prefix | `sha2-` (for SHA-256), empty string (for SHA-1) |
23+
| `%h` | Content hash with algorithm prefix | `sha2-d16c7976d9081368c7dca2da3a771065c3222069a1ad80dcd99d972b2efadc8b` |
24+
| `%Nh` | First `N` bytes of content hash with algorithm prefix | `sha2-d16` (for `%3h`) |
25+
| `%x` | File extension | `.pdf`, `.txt` |
26+
| `%%` | Literal `%` | `%` |
27+
28+
A full `$Opt["docstore"]` setting must include a full hash (`%H` or `%h`). If
29+
`$Opt["docstore"]` does not include a `%` sign, then HotCRP automatically
30+
appends `/%h%x` to the setting value, and if `$Opt["docstore"]` is `true`,
31+
HotCRP uses `docs/%h%x`. Relative paths are interpreted relative to the HotCRP
32+
installation directory.
33+
34+
The HotCRP PHP server must have read and write permission to the document
35+
store. `php-fpm` and/or `httpd` typically own the docstore directory, or they
36+
have group access (and the docstore direcrory has set-group-id permission).
37+
HotCRP will create subdirectories as necessary; for instance, with docstore
38+
`"docs/%2h/%H%x`, HotCRP might try to create the docstore subdirectory
39+
`docs/sha2-d1` to fit a file with SHA-256 hash
40+
`d16c7976d9081368c7dca2da3a771065c3222069a1ad80dcd99d972b2efadc8b`.
41+
42+
43+
## Temporary docstore
44+
45+
A special subdirectory of the docstore is used for large temporary files,
46+
especially files that may need to outlive a single request. Examples include
47+
chunks of uploaded documents and constructed ZIP archives and CSV files.
48+
49+
To form the temporary docstore, HotCRP appends `/tmp` to the docstore’s fixed
50+
prefix. For example, the docstore `"/home/hotcrp/docs/sub-%3h/%H%x"` has
51+
temporary docstore `/home/hotcrp/docs/tmp`.
52+
53+
The temporary docstore should be cleaned periodically, for instance by the
54+
batch script `php batch/cleandocstore.php`.
55+
56+
57+
## Docstore, database, and S3
58+
59+
HotCRP can store documents in the MySQL database (the default), in the
60+
docstore, and in Amazon S3. Amazon S3 is typically the slowest of these
61+
methods, but needs no separate backup. If you have configured either the
62+
docstore or S3, you can disable database storage by setting
63+
`$Opt["dbNoPapers"]` to `true`. If you have configured the docstore *and* S3,
64+
then the docstore can act as a cache for S3. Incoming documents are stored in
65+
both places; if a docstore version is missing later, HotCRP will check S3 for
66+
it.

devel/manual/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ configuring features inaccessible through the public settings UI.
99
* [**Components**](./components.md): How HotCRP configuration JSON files work
1010
* [**Message formatting and translation**](./fmt.md)
1111
* [**Page configuration**](./pages.md)
12+
* [**Document store**](./docstore.md)
1213

1314
## Specific pages
1415

devel/manual/sessions.md

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
# HotCRP session data
1+
# HotCRP sessions
22

3-
This page describes the format of HotCRP session data.
3+
Each HotCRP session contains both **global** data, relevant for all
4+
conferences attached to a session, and **conference** data relevant to a
5+
single conference. Session data is stored in PHP’s native [session
6+
encoding](https://www.php.net/manual/en/function.session-encode.php).
47

5-
HotCRP sessions contain both **global** data, relevant for all conferences
6-
attached to a session, and **conference** data, which is relevant to a single
7-
conference. Conferences are distinguished by their **session keys**, which are
8-
`@` followed by the conference’s database name.
98

10-
11-
## Global keys
9+
## Global session data
1210

1311
### Session version and expiration
1412

@@ -87,7 +85,7 @@ conference. Conferences are distinguished by their **session keys**, which are
8785
* `login_bounce`
8886

8987

90-
## Conference keys
88+
## Conference session data
9189

9290
Session data relevant to one conference is stored in the session element named
9391
by the conference’s session key, e.g., `@db-sigcomm23`. This element is

etc/distoptions.php

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,15 +115,13 @@
115115
// docstore Set to true to serve papers and other downloads from a
116116
// cache on the local filesystem. By default this cache is
117117
// created in the "docs" directory. You can also set
118-
// $Opt["docstore"] to a directory name.
119-
// docstoreSubdir Set to true (or a small number, like 3) if the document
120-
// store should use subdirectories. This can be useful if
121-
// you expect thousands of submissions.
118+
// $Opt["docstore"] to a directory name, or to a directory
119+
// pattern such as "docs/%2H/%h%x".
122120
// s3_bucket Amazon S3 bucket name to store paper submissions.
123121
// s3_key Amazon AWS access key ID (used for S3).
124122
// s3_secret Amazon AWS secret access key (used for S3).
125123
// dbNoPapers Set to true to not store papers in the database.
126-
// Requires filestore, S3 storage, or both.
124+
// Requires docstore, S3 storage, or both.
127125

128126

129127
// TIMES AND DATES

src/api/api_tags.php

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,7 @@ static function run(Contact $user, $qreq, $prow) {
111111
}
112112

113113
$assigner = new AssignmentSet($user);
114-
if ($prow) {
115-
$assigner->enable_papers($prow);
116-
}
114+
$assigner->enable_papers($prow);
117115
$assigner->parse(join("\n", $x));
118116
$mlist = $assigner->message_list();
119117
$ok = $assigner->execute();
@@ -144,9 +142,8 @@ static function run(Contact $user, $qreq, $prow) {
144142
}
145143

146144
/** @param Qrequest $qreq
147-
* @param ?PaperInfo $prow
148145
* @return JsonResult */
149-
static function assigntags(Contact $user, $qreq, $prow) {
146+
static function assigntags(Contact $user, $qreq) {
150147
if (!isset($qreq->tagassignment)) {
151148
return JsonResult::make_missing_error("tagassignment");
152149
}
@@ -162,9 +159,6 @@ static function assigntags(Contact $user, $qreq, $prow) {
162159
}
163160

164161
$assigner = new AssignmentSet($user);
165-
if ($prow) {
166-
$assigner->enable_papers($prow);
167-
}
168162
$assigner->parse(join("\n", $x));
169163
$mlist = $assigner->message_list();
170164
$ok = $assigner->execute();

src/conference.php

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -674,13 +674,6 @@ function refresh_options() {
674674
$dpath = $docstore;
675675
} else if ($docstore === true) {
676676
$dpath = "docs";
677-
} else if ($docstore === null && isset($this->opt["filestore"])) {
678-
if (is_string($this->opt["filestore"])) {
679-
$dpath = $this->opt["filestore"];
680-
} else if ($this->opt["filestore"] === true) {
681-
$dpath = "filestore";
682-
}
683-
$dpsubdir = $this->opt["filestoreSubdir"] ?? null;
684677
}
685678
if ($dpath !== "") {
686679
if ($dpath[0] !== "/") {

src/documentfiletree.php

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ static function random_index($di) {
277277
/** @return DocumentFileTreeMatch */
278278
function first_match(?DocumentFileTreeMatch $after = null) {
279279
$this->clear();
280-
$fm = new DocumentFileTreeMatch($this->treeid);
280+
$fm = new DocumentFileTreeMatch($this);
281281
for ($i = 0; $i < $this->_n; ++$i) {
282282
if ($i % 2 === 0) {
283283
$fm->fname .= $this->_components[$i];
@@ -297,7 +297,7 @@ function first_match(?DocumentFileTreeMatch $after = null) {
297297
/** @return DocumentFileTreeMatch */
298298
function random_match() {
299299
$this->clear();
300-
$fm = new DocumentFileTreeMatch($this->treeid);
300+
$fm = new DocumentFileTreeMatch($this);
301301
for ($i = 0; $i < $this->_n; ++$i) {
302302
if ($i % 2 === 0) {
303303
$fm->fname .= $this->_components[$i];
@@ -316,7 +316,7 @@ function random_match() {
316316

317317
function hide(DocumentFileTreeMatch $fm) {
318318
// account for removal
319-
assert($fm->treeid === $this->treeid);
319+
assert($fm->tree === $this);
320320
for ($i = count($fm->idxes) - 1; $i >= 0; --$i) {
321321
$this->_dirinfo[$fm->bdirs[$i]]->hide_component_index($fm->idxes[$i]);
322322
}
@@ -342,8 +342,9 @@ function jsonSerialize() {
342342
}
343343

344344
class DocumentFileTreeMatch {
345-
/** @var int */
346-
public $treeid;
345+
/** @var DocumentFileTree
346+
* @readonly */
347+
public $tree;
347348
/** @var list<string> */
348349
public $bdirs = [];
349350
/** @var list<int> */
@@ -359,9 +360,9 @@ class DocumentFileTreeMatch {
359360
/** @var null|int|false */
360361
private $_mtime;
361362

362-
/** @param int $treeid */
363-
function __construct($treeid) {
364-
$this->treeid = $treeid;
363+
/** @param DocumentFileTree $tree */
364+
function __construct($tree) {
365+
$this->tree = $tree;
365366
}
366367
/** @param int $idx
367368
* @param string $suffix */

0 commit comments

Comments
 (0)