From adc43e528725bb86d19e8d27bddb408bbbf45643 Mon Sep 17 00:00:00 2001 From: Jorge Morgado Vega Date: Tue, 29 Aug 2023 11:57:47 -0400 Subject: [PATCH] Update paper --- paper/paper.bib | 26 +++++++++++++++++ paper/paper.md | 77 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 87 insertions(+), 16 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index cce9bee..efc2bf4 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -144,3 +144,29 @@ @article{li2022two doi = {10.3390/app122110979}, publisher = {Multidisciplinary Digital Publishing Institute} } + +@article{shenk2021traja, + title = {Traja: A Python toolbox for animal trajectory analysis}, + author = {Shenk, Justin and Byttner, Wolf and Nambusubramaniyan, Saranraj and Zoeller, Alexander}, + journal = {Journal of Open Source Software}, + volume = {6}, + number = {63}, + pages = {3202}, + year = {2021}, + doi = {10.21105/joss.03202}, + url = {https://doi.org/10.21105/joss.03202}, + publisher = {The Open Journal} +} + +@article{joo2020navigating, + title={Navigating through the R packages for movement}, + author={Joo, Rocio and Boone, Matthew E and Clay, Thomas A and Patrick, Samantha C and Clusella-Trullas, Susana and Basille, Mathieu}, + journal={Journal of Animal Ecology}, + volume={89}, + number={1}, + pages={248--267}, + year={2020}, + doi = {10.1111/1365-2656.13116}, + url = {https://doi.org/10.1111/1365-2656.13116}, + publisher={Wiley Online Library} +} \ No newline at end of file diff --git a/paper/paper.md b/paper/paper.md index ab0dc66..21b49a4 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -19,7 +19,7 @@ authors: orcid: 0000-0001-7305-4710 equal-contrib: true affiliation: 3 - - name: E. Altshuler + - name: E.~Altshuler orcid: 0000-0003-4192-5635 equal-contrib: false affiliation: 3 @@ -55,7 +55,8 @@ been studied in many different fields, including robotics, behavior analysis, mobility pattern mining, and user activity recognition [@da2019survey]. This task presents multiple challenges for conventional classification models, such as the indeterminate length of trajectories [@li2022two], the range of entities that generate -trajectories [@janczura2020classification; @xiao2017identifying; @bae2022transformer], and the absence of established standards in trajectory datasets +trajectories [@janczura2020classification; @xiao2017identifying; @bae2022transformer], +and the absence of established standards in trajectory datasets [@xiao2017identifying; @bae2022transformer]. Our study endeavors to lay the foundation for the assessment of innovative @@ -64,10 +65,26 @@ a new framework, referred to as `pactus`, which addresses the challenges of trajectory classification by providing direct access to a carefully chosen collection of datasets and several trajectory classifiers. `pactus` facilitates researchers' ability to experiment with various approaches -and assess their performance on different types of data. +and assess their performance on different types of data. A comprehensive software +documentation is provided on +([https://pactus.readthedocs.io/en/latest/](pactus.readthedocs.io)). # Statement of need +In recent years, several software libraries have emerged, aiming to automate trajectory +data analysis. Within the R community, there are various available tools +[@joo2020navigating]. Recognizing the popularity and extensive usage of Python, the `traja` +software [@shenk2021traja] was developed to integrate different analysis techniques for +two-dimensional trajectories, primarily focusing on animal behavioral analysis. Additionally, +the `yupi` library [@reyes2023yupi] was created to handle trajectory analysis for applications +involving an arbitrary number of dimensions. + +Although these libraries offer valuable tools for trajectory classification, such as +classification models and feature extraction from trajectories, they were not specifically +designed for this task. Consequently, contemporary research on trajectory classification +faces limitations in terms of evaluation, often considering only a limited number of datasets +or reporting only a reduced set of metrics [@bae2022transformer]. + The lack of standardization in trajectory datasets, coupled with the difficulty of obtaining these datasets for evaluation, poses a significant challenge to researchers working in fields related to trajectory classification. Moreover, @@ -88,18 +105,18 @@ researchers to distribute their findings as simple Python scripts, relying on `p for all tasks related to data acquisition, processing, and model evaluation. -# Software Overview - -The functionalities of `pactus` can be divided into four different categories as shown in -\autoref{fig:overview}. +# Pactus Software Library -![Overview of the resources available in `pactus` coupled with an usage example.\label{fig:overview}](1.pdf) +The functionalities of `pactus` can be divided into modules: Data handling, Feature extraction, +Classification models and Evaluation. +## Data handling -The selection of datasets was conducted with meticulous care to encompass a broad +The library provides direct access to some of the most commonly used datasets for trajectory +classification. The selection of datasets was conducted with meticulous care to encompass a broad range of trajectories and classification objectives. Our initial selection includes GeoLife [@zheng2009mining; @zheng2008understanding; @zheng2010geolife], The Starkey -Project dataset, also known as `Animals' in the trajectory classification +Project dataset, also known as `Animals` in the trajectory classification community [@rapp2009elk], four different datasets from the the UCI repository [@Dua:2019] and two different hurricane datasets, provided by National Hurricane Center [@landsea2013atlantic] and the China Meteorological Administration @@ -107,19 +124,47 @@ Center [@landsea2013atlantic] and the China Meteorological Administration datasets were transformed into a standardized format utilizing the trajectory data structures proposed in [@reyes2023yupi]. Datasets are not bundled with the software package, but rather will be downloaded and cached automatically upon each -individual access through the library. +individual access through the library. A complete guide on how to use custom datasets or +requesting the inclusion of new datasets into `pactus` can be found in the documentation. + +## Feature extraction In order to mitigate the different-length trajectories on some datasets, `pactus` is able to extract statistical features from any trajectory and convert an arbitrary length trajectory into a fixed size vector whose components are engineered features typically used in the literature [@xiao2017identifying; @zheng2008understanding]. -Finally, several classification algorithms can be evaluated on the vectorized -versions of the trajectories (e.g., Random Forest, SVM, KNN) or, alternatively, -classifiers able to handle variable-size inputs (e.g., LSTM or Transformers [@bae2022transformer]) can be evaluated directly on the trajectory data. -In both cases, typical evaluation metrics for classification are computed -automatically for the model being evaluated. +Users can implement their own method to perform this conversion, and an example on how +to do it can be found in the documentation. However, there is a default method that uses +all the features computed by the `yupi` library. + +## Classification models and Evaluation + +Several classification algorithms are included in `pactus`. Some of them can be evaluated +on the vectorized versions of the trajectories (e.g., Random Forest, SVM, KNN). In other cases +the classifiers are able to handle variable-size inputs (e.g., LSTM or Transformers +[@bae2022transformer]) and can be evaluated directly on the trajectory data. In both cases, +typical evaluation metrics for classification are computed automatically for the model being evaluated. + +## Overview + +All the functionalities of the library can be integrated in a single script. \autoref{fig:overview} +shows an example on how to use `pactus` for training and evaluating a Random Forest model using the +Starkey Project dataset, also known as `Animals`. + +![Overview of the resources available in `pactus` coupled with an usage example.\label{fig:overview}](1.pdf) + +# Conclusions +The software presented with this work, `pactus`, addresses typical challenges faced in trajectory +classification research. By providing researchers with direct access to curated datasets and trajectory +classifiers, `pactus` enhances the availability of resources for evaluation. It is concieved with extensibility +in mind, encouraging researchers to contribute their own datasets and methods. The evaluation methodology ensures +reproducibility and comparability of results, facilitating the identification of effective trajectory classification +methods for specific scenarios. Additionally, pactus promotes reproducible research by enabling researchers to +distribute their findings as Python scripts, relying on pactus for data acquisition, processing, and model +evaluation. Overall, pactus offers a valuable tool for researchers in the field of trajectory classification, +addressing key challenges and facilitating future advancements in the field. # References