Skip to content

Commit

Permalink
chore: collect no subpages files (#334)
Browse files Browse the repository at this point in the history
* chore: collect no subpages resoureces

* chore: add test files

* chore: fix test

* chore: fix test

* chore: update test
  • Loading branch information
appflowy authored Nov 2, 2024
1 parent 4175465 commit be6bb90
Show file tree
Hide file tree
Showing 6 changed files with 491 additions and 136 deletions.
2 changes: 1 addition & 1 deletion collab-document/src/importer/md_importer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ fn create_paragraph_block(document_data: &mut DocumentData, parent_id: &str) ->
paragraph_block_id
}

fn create_image_block(block_id: &str, url: String, parent_id: &str) -> Block {
pub fn create_image_block(block_id: &str, url: String, parent_id: &str) -> Block {
let mut data = BlockData::new();
data.insert(URL_FIELD.to_string(), url.into());
data.insert(IMAGE_TYPE_FIELD.to_string(), EXTERNAL_IMAGE_TYPE.into());
Expand Down
26 changes: 25 additions & 1 deletion collab-importer/src/notion/importer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,20 @@ impl NotionImporter {
.await
.unwrap_or_default();

let no_subpages = !has_subdirectories(&self.path, 1);
let notion_export = NotionExportContext {
csv_relation,
no_subpages,
};

let path = self.path.clone();
let host = self.host.clone();
let workspace_id = self.workspace_id.clone();
let pages = tokio::task::spawn_blocking(move || {
// Process entries and track whether we have spaces (directories) and pages (non-directories)
let mut notion_pages: Vec<NotionPage> = vec![];
for entry in walk_sub_dir(&path) {
if let Some(view) = process_entry(&host, &workspace_id, &entry, false, &csv_relation) {
if let Some(view) = process_entry(&host, &workspace_id, &entry, false, &notion_export) {
has_spaces |= view.is_dir;
has_pages |= !view.is_dir;
notion_pages.push(view);
Expand Down Expand Up @@ -309,6 +315,15 @@ async fn convert_notion_page_to_parent_child(
view_builder.build()
}

pub struct NotionExportContext {
pub csv_relation: CSVRelation,
pub no_subpages: bool,
}

/// [CSVRelation] manages parent-child relationships between CSV files exported in zip format from Notion.
/// The zip export may contain multiple CSV files that represent views of the main *_all.csv file.
/// When a partial CSV file is encountered, it is replaced with the main *_all.csv file and directed to
/// reference the *_all.csv file using the specified ID.
#[derive(Default, Debug, Clone)]
pub struct CSVRelation {
inner: Arc<HashMap<String, PathBuf>>,
Expand Down Expand Up @@ -481,6 +496,15 @@ fn extract_file_name(input: &str) -> String {

normalized
}

fn has_subdirectories(path: &PathBuf, max_depth: usize) -> bool {
WalkDir::new(path)
.max_depth(max_depth)
.into_iter()
.filter_map(Result::ok)
.any(|entry| entry.file_type().is_dir() && entry.path() != path)
}

#[cfg(test)]
mod test_csv_relation {
use super::*;
Expand Down
Loading

0 comments on commit be6bb90

Please sign in to comment.