diff --git a/README.md b/README.md index 9b1ebe1..147257c 100644 --- a/README.md +++ b/README.md @@ -222,13 +222,15 @@ This is why the API to look up event info and question texts/authors is separated from looking up vote counts -- the former can have much longer cache time. -To allow querying questions for a given event and receive them in sorted -order, `questions` also has a [global secondary index] called `top` -whose partition key is the event ULID and sort key `votes`. That index -also projects out the "answered" and "hidden" fields so that a single -query to that index gives all the mutable state for an event's question -list (and can thus be queried with a single DynamoDB call by the -Lambda). +To allow querying questions for a given event, `questions` also has a +[global secondary index] called `top` whose partition key is the event +ULID. We don't use a sort key, since we want to sort by a [more complex +function]. That index also projects out the "answered" and "hidden" +fields so that a single query to that index gives all the mutable state +for an event's question list (and can thus be queried with a single +DynamoDB call by the Lambda). + +[more complex function]: https://www.evanmiller.org/ranking-news-items-with-upvotes.html **Metrics and Logging.** diff --git a/client/src/List.svelte b/client/src/List.svelte index 09a247c..e30494e 100644 --- a/client/src/List.svelte +++ b/client/src/List.svelte @@ -129,6 +129,7 @@ la.newQuestions = la.newQuestions.filter((qid) => !(qid in nowPresent)); for (const newQ of la.newQuestions) { console.info("add in", newQ); + // new questions always go at the bottom qs.push({ qid: newQ, hidden: false, @@ -188,9 +189,6 @@ } } } - qs.sort((a, b) => { - return b.votes - a.votes; - }); return qs; } diff --git a/client/src/store.js b/client/src/store.js index c6f1257..8b5f43c 100644 --- a/client/src/store.js +++ b/client/src/store.js @@ -125,10 +125,10 @@ export async function questionData(qid, qs) { // that's small-ish. // // ref https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchGetItem.html + qids.sort(); if (qids.length > 25) { qids = qids.slice(0, 25); } - qids.sort(); let arg = qids.join(","); // and go! // TODO: handle failure diff --git a/infra/dynamodb.tf b/infra/dynamodb.tf index 235ad1e..cb796c8 100644 --- a/infra/dynamodb.tf +++ b/infra/dynamodb.tf @@ -29,11 +29,6 @@ resource "aws_dynamodb_table" "questions" { type = "S" } - attribute { - name = "votes" - type = "N" - } - ttl { attribute_name = "expire" enabled = true @@ -42,8 +37,7 @@ resource "aws_dynamodb_table" "questions" { global_secondary_index { name = "top" hash_key = "eid" - range_key = "votes" projection_type = "INCLUDE" - non_key_attributes = ["answered", "hidden"] + non_key_attributes = ["answered", "hidden", "votes"] } } diff --git a/server/run-migrations.sh b/server/run-migrations.sh index b35f5e4..8ca8fd7 100755 --- a/server/run-migrations.sh +++ b/server/run-migrations.sh @@ -26,25 +26,24 @@ aws dynamodb create-table \ --attribute-definitions AttributeName=id,AttributeType=S \ --key-schema AttributeName=id,KeyType=HASH \ --billing-mode PAY_PER_REQUEST \ - --endpoint-url ${ENDPOINT_URL} >/dev/null + --endpoint-url "${ENDPOINT_URL}" >/dev/null aws dynamodb update-time-to-live \ --table-name events \ --time-to-live-specification Enabled=true,AttributeName=expire \ - --endpoint-url ${ENDPOINT_URL} >/dev/null + --endpoint-url "${ENDPOINT_URL}" >/dev/null echo "🗒️ Creating 'questions' table and 🚄 GSI..." aws dynamodb create-table \ --table-name questions \ --attribute-definitions AttributeName=id,AttributeType=S \ AttributeName=eid,AttributeType=S \ - AttributeName=votes,AttributeType=N \ --key-schema AttributeName=id,KeyType=HASH \ - --global-secondary-indexes 'IndexName=top,KeySchema=[{AttributeName=eid,KeyType=HASH},{AttributeName=votes,KeyType=RANGE}],Projection={ProjectionType=INCLUDE,NonKeyAttributes=[answered,hidden]}' \ + --global-secondary-indexes 'IndexName=top,KeySchema=[{AttributeName=eid,KeyType=HASH}],Projection={ProjectionType=INCLUDE,NonKeyAttributes=[answered,hidden,votes]}' \ --billing-mode PAY_PER_REQUEST \ - --endpoint-url ${ENDPOINT_URL} >/dev/null + --endpoint-url "${ENDPOINT_URL}" >/dev/null aws dynamodb update-time-to-live \ --table-name questions \ --time-to-live-specification Enabled=true,AttributeName=expire \ - --endpoint-url ${ENDPOINT_URL} >/dev/null + --endpoint-url "${ENDPOINT_URL}" >/dev/null diff --git a/server/src/list.rs b/server/src/list.rs index 871af63..c04b621 100644 --- a/server/src/list.rs +++ b/server/src/list.rs @@ -13,7 +13,10 @@ use http::{ header::{self, HeaderName}, StatusCode, }; -use std::collections::HashMap; +use std::{ + collections::HashMap, + time::{Duration, SystemTime}, +}; use ulid::Ulid; #[allow(unused_imports)] @@ -66,15 +69,6 @@ impl Backend { let qs = questions_by_eid .get_mut(eid) .expect("list for non-existing event"); - qs.sort_unstable_by_key(|qid| { - std::cmp::Reverse( - questions[qid]["votes"] - .as_n() - .expect("votes is always set") - .parse::() - .expect("votes are always numbers"), - ) - }); Ok(QueryOutput::builder() .set_count(Some(qs.len() as i32)) @@ -193,7 +187,50 @@ async fn list_inner( match dynamo.list(&eid, has_secret).await { Ok(qs) => { trace!(%eid, n = %qs.count(), "listed questions"); - let questions: Vec<_> = qs.items().iter().filter_map(serialize_question).collect(); + let mut questions: Vec<_> = qs.items().iter().filter_map(serialize_question).collect(); + + // sort based on "hotness" of the question over time: + // https://www.evanmiller.org/ranking-news-items-with-upvotes.html + // the wrapper struct is needed because f64 doesn't impl Ord + #[derive(Debug)] + #[repr(transparent)] + struct Score(f64); + impl PartialEq for Score { + fn eq(&self, other: &Self) -> bool { + self.cmp(other).is_eq() + } + } + impl Eq for Score {} + impl PartialOrd for Score { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + impl Ord for Score { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.total_cmp(&other.0) + } + } + let now = SystemTime::now(); + let score = |q: &serde_json::Value| { + let dt_in_minutes_rounded_down = now + .duration_since( + q["qid"] + .as_str() + .expect("it's a ULID") + .parse::() + .expect("produced as ULID by us") + .datetime(), + ) + .unwrap_or(Duration::ZERO) + .as_secs() + / 60; + let dt = dt_in_minutes_rounded_down + 1; + let votes = q["votes"].as_u64().expect("votes is a number") as f64; + let exp = (-1. * dt as f64).exp(); + Score(exp * votes / (1. - exp)) + }; + questions.sort_by_cached_key(|q| std::cmp::Reverse(score(q))); let max_age = if has_secret { // hosts should be allowed to see more up-to-date views