Skip to content

Commit

Permalink
bugfix: skip first page of rendered images
Browse files Browse the repository at this point in the history
  • Loading branch information
densumesh committed Nov 20, 2024
1 parent 0e520f4 commit da7d930
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions pdf2md/server/src/workers/supervisor-worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,20 @@ pub async fn chunk_pdf(

let pages = pdf
.render(pdf2image::Pages::All, None)
.map_err(|err| ServiceError::BadRequest(format!("Failed to render PDF file {:?}", err)))?;
.map_err(|err| ServiceError::BadRequest(format!("Failed to render PDF file {:?}", err)))?
.into_iter()
.skip(1)
.collect::<Vec<_>>();

let num_pages = pages.len();

update_task_status(
task.id,
FileTaskStatus::ProcessingFile(num_pages as u32),
&clickhouse_client,
)
.await?;

// Process each chunk
for (i, page) in pages.into_iter().enumerate() {
let file_name = format!("{}page{}.jpeg", task.id, i + 1);
Expand Down Expand Up @@ -165,12 +176,5 @@ pub async fn chunk_pdf(
log::info!("Uploaded page {} of {} to S3", i + 1, num_pages);
}

update_task_status(
task.id,
FileTaskStatus::ProcessingFile(num_pages as u32),
&clickhouse_client,
)
.await?;

Ok(())
}

0 comments on commit da7d930

Please sign in to comment.