Skip to content

Commit

Permalink
feature: completed MVP implementation of the UI
Browse files Browse the repository at this point in the history
  • Loading branch information
skeptrunedev authored and cdxker committed Nov 19, 2024
1 parent 215451c commit c8f93f3
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 119 deletions.
2 changes: 0 additions & 2 deletions pdf2md/server/src/operators/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,6 @@ pub async fn get_task_pages(
if FileTaskStatus::from(task.status.clone()) == FileTaskStatus::Completed || task.pages > 0 {
let limit = limit.unwrap_or(20);

log::info!("offset id {:?}", offset_id);

let pages: Vec<ChunkClickhouse> = clickhouse_client
.query(
"SELECT ?fields FROM file_chunks WHERE task_id = ? AND id > ? ORDER BY id LIMIT ?",
Expand Down
2 changes: 1 addition & 1 deletion pdf2md/server/src/operators/pdf_chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ pub async fn chunk_sub_pages(
clickhouse_client: &clickhouse::Client,
redis_pool: &RedisPool,
) -> Result<Vec<ChunkClickhouse>, ServiceError> {
log::info!("Chunking pages for {:?} size {}", task.task_id, data.len());
log::info!("Chunking pages for {:?} size {}", task.id, data.len());
let pdf = PDF::from_bytes(data)
.map_err(|err| ServiceError::BadRequest(format!("Failed to open PDF file {:?}", err)))?;

Expand Down
18 changes: 14 additions & 4 deletions pdf2md/server/src/operators/s3.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashMap;

use s3::{creds::Credentials, Bucket, Region};

use crate::{errors::ServiceError, get_env};
Expand Down Expand Up @@ -37,10 +39,18 @@ pub fn get_aws_bucket() -> Result<Bucket, ServiceError> {
}

pub async fn get_signed_url(bucket: &Bucket, key: &str) -> Result<String, ServiceError> {
let url = bucket.presign_get(key, 3600, None).await.map_err(|e| {
log::error!("Could not get signed url {:?}", e);
ServiceError::BadRequest("Could not get signed url".to_string())
})?;
let mut custom_queries: HashMap<String, String> = HashMap::new();
custom_queries.insert("response-content-disposition".into(), "inline".into());
custom_queries.insert("response-content-type".into(), "application/pdf".into());
custom_queries.insert("response-content-encoding".into(), "utf-8".into());

let url = bucket
.presign_get(key, 3600, Some(custom_queries))
.await
.map_err(|e| {
log::error!("Could not get signed url {:?}", e);
ServiceError::BadRequest("Could not get signed url".to_string())
})?;

Ok(url)
}
44 changes: 25 additions & 19 deletions pdf2md/server/src/templates/demo-ui.html
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
{% extends "skeleton.html" %} {% block scripts %}
<script src="/static/pdf2md.js" defer></script>
<script
src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.8.69/pdf.mjs"
type="module"
></script>
<script
src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.8.69/pdf_viewer.mjs"
defer
type="module"
></script>
<script src="/static/viewpdf.js" type="module" defer></script>
<script src="https://unpkg.com/pdfobject"></script>
<script src="https://cdn.jsdelivr.net/npm/notyf@3/notyf.min.js"></script>
<script>
PDFObject.embed("/path/to/file.pdf", "#my-pdf");
</script>
<link
rel="stylesheet"
href="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.8.69/pdf_viewer.min.css"
integrity="sha512-qBj3yMdvzL7dOHWfvs21eTD0LURNR9Jhcy5ZMfR7E5NOKev5i9Iu49Yuijdm/or10JyenuaRuflq6DG/E04fcQ=="
crossorigin="anonymous"
referrerpolicy="no-referrer"
href="https://cdn.jsdelivr.net/npm/notyf@3/notyf.min.css"
/>
<style>
.pdfobject-container {
height: 75vh;
}

.task-status:not(.status-completed) {
animation: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
}
</style>
{% endblock %} {% block body %}
<div
id="upload-form-container"
Expand Down Expand Up @@ -72,12 +73,17 @@
</div>
</form>
</div>
<canvas
id="the-canvas"
style="border: 1px solid black; direction: ltr"
></canvas>
<div class="px-4">
<div
class="mt-10 sm:mt-14 md:mt-24 grid grid-cols-2 gap-4 max-w-7xl mx-auto border border-gray-900"
>
<div id="my-pdf"></div>
<div id="markdown-container" class="max-h-[75vh] overflow-y-auto"></div>
</div>
</div>
<div class="flow-root">
<div class="mt-10 sm:mt-14 md:mt-24 hidden"></div>
<div class="mt-10 sm:mt-14 md:mt-24 pt-4 hidden"></div>
<div class="my-4 animate-pulse hidden h-1 bg-gray-700"></div>
</div>
<div
id="task-status-table-container"
Expand Down
53 changes: 53 additions & 0 deletions pdf2md/server/static/output.css
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,11 @@ video {
margin-bottom: 3rem;
}

.my-4 {
margin-top: 1rem;
margin-bottom: 1rem;
}

.mt-10 {
margin-top: 2.5rem;
}
Expand Down Expand Up @@ -626,6 +631,10 @@ video {
display: flow-root;
}

.grid {
display: grid;
}

.hidden {
display: none;
}
Expand All @@ -635,6 +644,10 @@ video {
height: 3rem;
}

.h-1 {
height: 0.25rem;
}

.h-12 {
height: 3rem;
}
Expand All @@ -643,6 +656,10 @@ video {
height: 75vh;
}

.max-h-\[75vh\] {
max-height: 75vh;
}

.w-12 {
width: 3rem;
}
Expand All @@ -659,10 +676,24 @@ video {
max-width: 28rem;
}

@keyframes pulse {
50% {
opacity: .5;
}
}

.animate-pulse {
animation: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
}

.cursor-pointer {
cursor: pointer;
}

.grid-cols-2 {
grid-template-columns: repeat(2, minmax(0, 1fr));
}

.flex-wrap {
flex-wrap: wrap;
}
Expand All @@ -679,6 +710,10 @@ video {
justify-content: space-between;
}

.gap-4 {
gap: 1rem;
}

.gap-x-4 {
-moz-column-gap: 1rem;
column-gap: 1rem;
Expand Down Expand Up @@ -712,6 +747,10 @@ video {
overflow-x: auto;
}

.overflow-y-auto {
overflow-y: auto;
}

.whitespace-nowrap {
white-space: nowrap;
}
Expand Down Expand Up @@ -740,10 +779,20 @@ video {
border-style: dashed;
}

.border-gray-900 {
--tw-border-opacity: 1;
border-color: rgb(17 24 39 / var(--tw-border-opacity));
}

.border-gray-900\/25 {
border-color: rgb(17 24 39 / 0.25);
}

.bg-gray-700 {
--tw-bg-opacity: 1;
background-color: rgb(55 65 81 / var(--tw-bg-opacity));
}

.bg-white {
--tw-bg-opacity: 1;
background-color: rgb(255 255 255 / var(--tw-bg-opacity));
Expand Down Expand Up @@ -817,6 +866,10 @@ video {
padding-right: 1rem;
}

.pt-4 {
padding-top: 1rem;
}

.text-left {
text-align: left;
}
Expand Down
115 changes: 108 additions & 7 deletions pdf2md/server/static/pdf2md.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ const defaultTableRowStr = `
const defaultTableRow = document.createElement("tr");
defaultTableRow.innerHTML = defaultTableRowStr;

var notyf = new Notyf();

const upsertTaskToStorage = (task) => {
let tasks = JSON.parse(localStorage.getItem("tasks")) || [];
if (tasks.find((t) => t.id === task.id)) {
Expand All @@ -40,6 +42,54 @@ const upsertTaskToStorage = (task) => {
updateTaskStatusTable();
};

const displayTask = (task) => {
const markdownContainer = document.getElementById("markdown-container");
const taskId = markdownContainer.getAttribute("data-task-id");
const taskStatus = markdownContainer.getAttribute("data-task-status");
const taskNumPages = markdownContainer.getAttribute("data-task-num-pages");
if (
taskId === task.id &&
taskStatus === task.status &&
taskNumPages === task.num_pages.toString()
) {
console.log("Task already displayed", task.id);
return;
}

PDFObject.embed(task.file_url, "#my-pdf", {
pdfOpenParams: {
view: "FitH",
},
});
const pages = task.pages;
const sortedPages = pages.sort((a, b) => a.metadata.page - b.metadata.page);

while (markdownContainer.firstChild) {
markdownContainer.removeChild(markdownContainer.firstChild);
}

markdownContainer.setAttribute("data-task-id", task.id);
markdownContainer.setAttribute("data-task-status", task.status);
markdownContainer.setAttribute("data-task-num-pages", task.num_pages);

sortedPages.forEach((page) => {
const pageContainer = document.createElement("div");
pageContainer.classList.add("page-container");
pageContainer.innerText = page.content;
markdownContainer.appendChild(pageContainer);
const spacerDiv = document.createElement("div");
spacerDiv.classList.add(...["my-4", "h-1", "bg-gray-700"]);
markdownContainer.appendChild(spacerDiv);
});
if (!sortedPages.length) {
const pageContainer = document.createElement("div");
pageContainer.classList.add(...["page-container", "animate-pulse", "pt-4"]);
pageContainer.innerText =
"Your file is being converted. We are pinging the server every 5 seconds to check for status updates. Please be patient!";
markdownContainer.appendChild(pageContainer);
}
};

const fileUploadInput = document.getElementById("file-upload");

fileUploadInput.addEventListener("change", (event) => {
Expand Down Expand Up @@ -69,10 +119,24 @@ fileUploadInput.addEventListener("change", (event) => {
})
.then((response) => response.json())
.then((data) => {
notyf.success({
message:
"File uploaded! We are processing the file. Please wait. Scroll down to the table to view the status.",
dismissable: true,
position: { x: "center", y: "top" },
});

upsertTaskToStorage(data);
const url = new URL(window.location);
url.searchParams.set("taskId", data.id);
window.history.pushState({}, "", url);
})
.catch((error) => {
console.error("Error:", error);
notyf.error({
message: `Error uploading file. Please try again later. ${error}`,
dismissable: true,
position: { x: "center", y: "top" },
});
});
};

Expand All @@ -91,7 +155,10 @@ const updateTaskStatusTable = () => {
: defaultTableRow.cloneNode(true);
row.querySelector(".task-id").innerText = task.id;
row.querySelector(".task-file-name").innerText = task.file_name;
row.querySelector(".task-status").innerText = task.status;
row.querySelector(".task-status").innerText =
task.status.toLowerCase() === "completed"
? task.status
: `${task.status} | Please wait. Checking for updates every 5 seconds.`;
row
.querySelector(".task-status")
.classList.add(
Expand All @@ -102,11 +169,7 @@ const updateTaskStatusTable = () => {
url.searchParams.set("taskId", task.id);
window.history.pushState({}, "", url);

document.dispatchEvent(
new CustomEvent("open-pdf", {
detail: { pdfUrl: task.file_url },
})
);
displayTask(task);
});
return row;
});
Expand All @@ -126,6 +189,14 @@ updateTaskStatusTable();
const refreshTasks = () => {
const tasks = JSON.parse(localStorage.getItem("tasks")) || [];
tasks.forEach((task) => {
if (
task.status.toLowerCase() === "completed" &&
task.pages &&
task.pages.length
) {
return;
}

fetch(`/api/task/${task.id}`, {
headers: {
Authorization: window.TRIEVE_API_KEY,
Expand All @@ -139,6 +210,36 @@ const refreshTasks = () => {
console.error("Error:", error);
});
});

const url = new URL(window.location);
const taskId = url.searchParams.get("taskId");
tasks.forEach((task) => {
if (task.id === taskId) {
displayTask(task);
}
});
};

setInterval(refreshTasks, 5000);

const setActiveTaskFromUrl = () => {
const url = new URL(window.location);
const taskId = url.searchParams.get("taskId");
if (taskId) {
fetch(`/api/task/${taskId}`, {
headers: {
Authorization: window.TRIEVE_API_KEY,
},
})
.then((response) => response.json())
.then((data) => {
upsertTaskToStorage(data);
displayTask(data);
})
.catch((error) => {
console.error("Error:", error);
});
}
};

setActiveTaskFromUrl();
Loading

0 comments on commit c8f93f3

Please sign in to comment.