references.bib


@incollection{federrath_apkcombiner:_2015,
	address = {Cham},
	title = {{ApkCombiner}: {Combining} {Multiple} {Android} {Apps} to {Support} {Inter}-{App} {Analysis}},
	volume = {455},
	isbn = {978-3-319-18466-1 978-3-319-18467-8},
	shorttitle = {{ApkCombiner}},
	url = {http://link.springer.com/10.1007/978-3-319-18467-8_34},
	abstract = {Android apps are made of components which can leak information between one another using the ICC mechanism. With the growing momentum of Android, a number of research contributions have led to tools for the intra-app analysis of Android apps. Unfortunately, these state-of-the-art approaches, and the associated tools, have long left out the security ﬂaws that arise across the boundaries of single apps, in the interaction between several apps. In this paper, we present a tool called ApkCombiner which aims at reducing an inter-app communication problem to an intra-app inter-component communication problem. In practice, ApkCombiner combines diﬀerent apps into a single apk on which existing tools can indirectly perform inter-app analysis. We have evaluated ApkCombiner on a dataset of 3,000 real-world Android apps, to demonstrate its capability to support static context-aware inter-app analysis scenarios.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {{ICT} {Systems} {Security} and {Privacy} {Protection}},
	publisher = {Springer International Publishing},
	author = {Li, Li and Bartel, Alexandre and Bissyandé, Tegawendé F. and Klein, Jacques and Traon, Yves Le},
	editor = {Federrath, Hannes and Gollmann, Dieter},
	year = {2015},
	doi = {10.1007/978-3-319-18467-8_34},
	keywords = {static, characterization, leaks},
	pages = {513--527},
	file = {Li et al. - 2015 - ApkCombiner Combining Multiple Android Apps to Su.pdf:/home/fmind/Documents/Zotero/storage/M7C5CRE6/Li et al. - 2015 - ApkCombiner Combining Multiple Android Apps to Su.pdf:application/pdf}
}

@article{li_using_2015,
	title = {Using {An} {Instrumentation} based {Approach} to {Detect} {Inter}-{Component} {Leaks} in {Android} {Apps}},
	language = {en},
	author = {Li, Li and Bissyande, Tegawende F and Klein, Jacques and Traon, Yves Le},
	month = mar,
	year = {2015},
	keywords = {detection, static, leaks},
	pages = {2},
	file = {Li et al. - Using An Instrumentation based Approach to Detect .pdf:/home/fmind/Documents/Zotero/storage/ZVFKLBFW/Li et al. - Using An Instrumentation based Approach to Detect .pdf:application/pdf}
}

@inproceedings{li_potential_2015,
	title = {Potential {Component} {Leaks} in {Android} {Apps}: {An} {Investigation} into a {New} {Feature} {Set} for {Malware} {Detection}},
	isbn = {978-1-4673-7989-2},
	shorttitle = {Potential {Component} {Leaks} in {Android} {Apps}},
	url = {http://ieeexplore.ieee.org/document/7272932/},
	doi = {10.1109/QRS.2015.36},
	abstract = {We discuss the capability of a new feature set for malware detection based on potential component leaks (PCLs). PCLs are deﬁned as sensitive data-ﬂows that involve Android inter-component communications. We show that PCLs are common in Android apps and that malicious applications indeed manipulate signiﬁcantly more PCLs than benign apps. Then, we evaluate a machine learning-based approach relying on PCLs. Experimental validations show high performance for identifying malware, demonstrating that PCLs can be used for discriminating malicious apps from benign apps.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Li, Li and Allix, Kevin and Li, Daoyuan and Bartel, Alexandre and Bissyande, Tegawende F. and Klein, Jacques},
	month = aug,
	year = {2015},
	keywords = {detection, static, leaks},
	pages = {195--200},
	file = {Li et al. - 2015 - Potential Component Leaks in Android Apps An Inve.pdf:/home/fmind/Documents/Zotero/storage/2CLI4DWF/Li et al. - 2015 - Potential Component Leaks in Android Apps An Inve.pdf:application/pdf}
}

@inproceedings{jerome_using_2014,
	title = {Using opcode-sequences to detect malicious {Android} applications},
	isbn = {978-1-4799-2003-7},
	url = {http://ieeexplore.ieee.org/document/6883436/},
	doi = {10.1109/ICC.2014.6883436},
	abstract = {Recently, the Android platform has seen its number of malicious applications increased sharply. Motivated by the easy application submission process and the number of alternative market places for distributing Android applications, rogue authors are developing constantly new malicious programs. While current anti-virus software mainly relies on signature detection, the issue of alternative malware detection has to be addressed. In this paper, we present a feature based detection mechanism relying on opcode-sequences combined with machine learning techniques. We assess our tool on both a reference dataset known as Genome Project as well as on a wider sample of 40,000 applications retrieved from the Google Play Store.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Jerome, Quentin and Allix, Kevin and State, Radu and Engel, Thomas},
	month = jun,
	year = {2014},
	keywords = {detection, static},
	pages = {914--919},
	file = {Jerome et al. - 2014 - Using opcode-sequences to detect malicious Android.pdf:/home/fmind/Documents/Zotero/storage/AJ77Z362/Jerome et al. - 2014 - Using opcode-sequences to detect malicious Android.pdf:application/pdf}
}

@incollection{piessens_are_2015,
	address = {Cham},
	title = {Are {Your} {Training} {Datasets} {Yet} {Relevant}?},
	volume = {8978},
	isbn = {978-3-319-15617-0 978-3-319-15618-7},
	url = {http://link.springer.com/10.1007/978-3-319-15618-7_5},
	abstract = {In this paper, we consider the relevance of timeline in the construction of datasets, to highlight its impact on the performance of a machine learning-based malware detection scheme. Typically, we show that simply picking a random set of known malware to train a malware detector, as it is done in many assessment scenarios from the literature, yields signiﬁcantly biased results. In the process of assessing the extent of this impact through various experiments, we were also able to conﬁrm a number of intuitive assumptions about Android malware. For instance, we discuss the existence of Android malware lineages and how they could impact the performance of malware detection in the wild.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Engineering {Secure} {Software} and {Systems}},
	publisher = {Springer International Publishing},
	author = {Allix, Kevin and Bissyandé, Tegawendé F. and Klein, Jacques and Le Traon, Yves},
	editor = {Piessens, Frank and Caballero, Juan and Bielova, Nataliia},
	year = {2015},
	doi = {10.1007/978-3-319-15618-7_5},
	keywords = {study},
	pages = {51--67},
	file = {Allix et al. - 2015 - Are Your Training Datasets Yet Relevant.pdf:/home/fmind/Documents/Zotero/storage/XJ9UGXG7/Allix et al. - 2015 - Are Your Training Datasets Yet Relevant.pdf:application/pdf}
}

@article{allix_empirical_2016,
	title = {Empirical assessment of machine learning-based malware detectors for {Android}: {Measuring} the gap between in-the-lab and in-the-wild validation scenarios},
	volume = {21},
	issn = {1382-3256, 1573-7616},
	shorttitle = {Empirical assessment of machine learning-based malware detectors for {Android}},
	url = {http://link.springer.com/10.1007/s10664-014-9352-6},
	doi = {10.1007/s10664-014-9352-6},
	abstract = {To address the issue of malware detection through large sets of applications, researchers have recently started to investigate the capabilities of machine-learning techniques for proposing eﬀective approaches. So far, several promising results were recorded in the literature, many approaches being assessed with what we call in the lab validation scenarios. This paper revisits the purpose of malware detection to discuss whether such in the lab validation scenarios provide reliable indications on the performance of malware detectors in real-world settings, aka in the wild.},
	language = {en},
	number = {1},
	urldate = {2018-04-10},
	journal = {Empirical Software Engineering},
	author = {Allix, Kevin and Bissyandé, Tegawendé F. and Jérome, Quentin and Klein, Jacques and State, Radu and Le Traon, Yves},
	month = feb,
	year = {2016},
	keywords = {study},
	pages = {183--211},
	file = {Allix et al. - 2016 - Empirical assessment of machine learning-based mal.pdf:/home/fmind/Documents/Zotero/storage/8VFQCJBF/Allix et al. - 2016 - Empirical assessment of machine learning-based mal.pdf:application/pdf}
}

@inproceedings{li_iccta:_2015,
	title = {{IccTA}: {Detecting} {Inter}-{Component} {Privacy} {Leaks} in {Android} {Apps}},
	isbn = {978-1-4799-1934-5},
	shorttitle = {{IccTA}},
	url = {http://ieeexplore.ieee.org/document/7194581/},
	doi = {10.1109/ICSE.2015.48},
	abstract = {Shake Them All is a popular “Wallpaper” application exceeding millions of downloads on Google Play. At installation, this application is given permission to (1) access the Internet (for updating wallpapers) and (2) use the device microphone (to change background following noise changes). With these permissions, the application could silently record user conversations and upload them remotely. To give more conﬁdence about how Shake Them All actually processes what it records, it is necessary to build a precise analysis tool that tracks the ﬂow of any sensitive data from its source point to any sink, especially if those are in different components.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Li, Li and Bartel, Alexandre and Bissyande, Tegawende F. and Klein, Jacques and Le Traon, Yves and Arzt, Steven and Rasthofer, Siegfried and Bodden, Eric and Octeau, Damien and McDaniel, Patrick},
	month = may,
	year = {2015},
	keywords = {detection, static, leaks},
	pages = {280--291},
	file = {Li et al. - 2015 - IccTA Detecting Inter-Component Privacy Leaks in .pdf:/home/fmind/Documents/Zotero/storage/FP337E2L/Li et al. - 2015 - IccTA Detecting Inter-Component Privacy Leaks in .pdf:application/pdf}
}

@inproceedings{li_automatically_2014,
	title = {Automatically {Exploiting} {Potential} {Component} {Leaks} in {Android} {Applications}},
	isbn = {978-1-4799-6513-7},
	url = {http://ieeexplore.ieee.org/document/7011274/},
	doi = {10.1109/TrustCom.2014.50},
	abstract = {We present PCLeaks, a tool based on intercomponent communication (ICC) vulnerabilities to perform dataﬂow analysis on Android applications to ﬁnd potential component leaks that could potentially be exploited by other components. To evaluate our approach, we run PCLeaks on 2000 apps randomly selected from the Google Play store. PCLeaks reports 986 potential component leaks in 185 apps. For each leak reported by PCLeaks, PCLeaksValidator automatically generates an Android app which tries to exploit the leak. By manually running a subset of the generated apps, we ﬁnd that 75\% of the reported leaks are exploitable leaks.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Li, Li and Bartel, Alexandre and Klein, Jacques and Traon, Yves Le},
	month = sep,
	year = {2014},
	keywords = {leaks},
	pages = {388--397},
	file = {Li et al. - 2014 - Automatically Exploiting Potential Component Leaks.pdf:/home/fmind/Documents/Zotero/storage/4ZA23SCB/Li et al. - 2014 - Automatically Exploiting Potential Component Leaks.pdf:application/pdf}
}

@inproceedings{allix_forensic_2014,
	title = {A {Forensic} {Analysis} of {Android} {Malware} -- {How} is {Malware} {Written} and {How} it {Could} {Be} {Detected}?},
	isbn = {978-1-4799-3575-8},
	url = {http://ieeexplore.ieee.org/document/6899240/},
	doi = {10.1109/COMPSAC.2014.61},
	abstract = {We consider in this paper the analysis of a large set of malware and benign applications from the Android ecosystem. Although a large body of research work has dealt with Android malware over the last years, none has addressed it from a forensic point of view.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Allix, Kevin and Jerome, Quentin and Bissyande, Tegawende F. and Klein, Jacques and State, Radu and Traon, Yves Le},
	month = jul,
	year = {2014},
	keywords = {study},
	pages = {384--393},
	file = {Allix et al. - 2014 - A Forensic Analysis of Android Malware -- How is M.pdf:/home/fmind/Documents/Zotero/storage/4QV3I6RA/Allix et al. - 2014 - A Forensic Analysis of Android Malware -- How is M.pdf:application/pdf}
}

@inproceedings{allix_large-scale_2014,
	title = {Large-scale machine learning-based malware detection: confronting the "10-fold cross validation" scheme with reality},
	isbn = {978-1-4503-2278-2},
	shorttitle = {Large-scale machine learning-based malware detection},
	url = {http://dl.acm.org/citation.cfm?doid=2557547.2557587},
	doi = {10.1145/2557547.2557587},
	abstract = {To address the issue of malware detection, researchers have recently started to investigate the capabilities of machinelearning techniques for proposing eﬀective approaches. Several promising results were recorded in the literature, many approaches being assessed with the common “10-Fold cross validation” scheme. This paper revisits the purpose of malware detection to discuss the adequacy of the “10-Fold” scheme for validating techniques that may not perform well in reality. To this end, we have devised several Machine Learning classiﬁers that rely on a novel set of features built from applications’ CFGs. We use a sizeable dataset of over 50,000 Android applications collected from sources where state-ofthe art approaches have selected their data. We show that our approach outperforms existing machine learning-based approaches. However, this high performance on usual-size datasets does not translate in high performance in the wild.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Allix, Kevin and Bissyandé, Tegawendé F. and Jérome, Quentin and Klein, Jacques and State, Radu and Le Traon, Yves},
	year = {2014},
	keywords = {detection, design},
	pages = {163--166},
	file = {Allix et al. - 2014 - Large-scale machine learning-based malware detecti.pdf:/home/fmind/Documents/Zotero/storage/GLMC679F/Allix et al. - 2014 - Large-scale machine learning-based malware detecti.pdf:application/pdf}
}

@article{allix_machine_2014,
	title = {Machine {Learning}-{Based} {Malware} {Detection} for {Android} {Applications}: {History} {Matters}!},
	abstract = {Machine Learning-based malware detection is a promising scalable method for identifying suspicious applications. In particular, in today’s mobile computing realm where thousands of applications are daily poured into markets, such a technique could be valuable to guarantee a strong ﬁltering of malicious apps. The success of machine-learning approaches however is highly dependent on (1) the quality of the datasets that are used for training and of (2) the appropriateness of the tested datasets with regards to the built classiﬁers. Unfortunately, there is scarce mention of these aspects in the evaluation of existing state-of-the-art approaches in the literature.},
	language = {en},
	author = {Allix, Kevin and Klein, Jacques},
	month = may,
	year = {2014},
	keywords = {detection, design},
	pages = {17},
	file = {Allix and Klein - Machine Learning-Based Malware Detection for Andro.pdf:/home/fmind/Documents/Zotero/storage/K6UN2DFM/Allix and Klein - Machine Learning-Based Malware Detection for Andro.pdf:application/pdf}
}

@article{li_detecting_2014,
	title = {Detecting privacy leaks in {Android} {Apps}},
	abstract = {The number of Android apps have grown explosively in recent years and the number of apps leaking private data have also grown. It is necessary to make sure all the apps are not leaking private data before putting them to the app markets and thereby a privacy leaks detection tool is needed. We propose a static taint analysis approach which leverages the control-ﬂow graph (CFG) of apps to detect privacy leaks among Android apps. We tackle three problems related to intercomponent communication (ICC), lifecycle of components and callback mechanism making the CFG imprecision. To bridge this gap, we explicitly connect the discontinuities of the CFG to provide a precise CFG. Based on the precise CFG, we aim at providing a taint analysis approach to detect intra-component privacy leaks, inter-component privacy leaks and also inter-app privacy leaks.},
	language = {en},
	journal = {Li Li},
	author = {Li, Li and Bartel, Alexandre and Klein, Jacques},
	month = feb,
	year = {2014},
	keywords = {detection, leaks},
	pages = {6},
	file = {Li et al. - Detecting privacy leaks in Android Apps.pdf:/home/fmind/Documents/Zotero/storage/PFHIPK2P/Li et al. - Detecting privacy leaks in Android Apps.pdf:application/pdf}
}

@article{li_using_2014,
	title = {Using {A} {Path} {Matching} {Algorithm} to {Detect} {Inter}-{Component} {Leaks} in {Android} {Apps}},
	language = {en},
	author = {Li, Li and Bartel, Alexandre and Klein, Jacques},
	month = mar,
	year = {2014},
	keywords = {detection, static, leaks},
	pages = {2},
	file = {Li et al. - Using A Path Matching Algorithm to Detect Inter-Co.pdf:/home/fmind/Documents/Zotero/storage/P9BCESZ4/Li et al. - Using A Path Matching Algorithm to Detect Inter-Co.pdf:application/pdf}
}

@article{octeau_effective_2013,
	title = {Effective {Inter}-{Component} {Communication} {Mapping} in {Android} with {Epicc}: {An} {Essential} {Step} {Towards} {Holistic} {Security} {Analysis}},
	abstract = {Many threats present in smartphones are the result of interactions between application components, not just artifacts of single components. However, current techniques for identifying inter-application communication are ad hoc and do not scale to large numbers of applications. In this paper, we reduce the discovery of inter-component communication (ICC) in smartphones to an instance of the Interprocedural Distributive Environment (IDE) problem, and develop a sound static analysis technique targeted to the Android platform. We apply this analysis to 1,200 applications selected from the Play store and characterize the locations and substance of their ICC. Experiments show that full speciﬁcations for ICC can be identiﬁed for over 93\% of ICC locations for the applications studied. Further the analysis scales well; analysis of each application took on average 113 seconds to complete. Epicc, the resulting tool, ﬁnds ICC vulnerabilities with far fewer false positives than the next best tool. In this way, we develop a scalable vehicle to extend current security analysis to entire collections of applications as well as the interfaces they export.},
	language = {en},
	author = {Octeau, Damien and McDaniel, Patrick and Jha, Somesh and Bartel, Alexandre and Bodden, Eric},
	month = aug,
	year = {2013},
	keywords = {characterization},
	pages = {16},
	file = {Octeau et al. - Effective Inter-Component Communication Mapping in.pdf:/home/fmind/Documents/Zotero/storage/U8752DQZ/Octeau et al. - Effective Inter-Component Communication Mapping in.pdf:application/pdf}
}

@inproceedings{bartel_model_2011,
	title = {Model {Driven} {Mutation} {Applied} to {Adaptative} {Systems} {Testing}},
	isbn = {978-1-4577-0019-4},
	url = {http://ieeexplore.ieee.org/document/5954440/},
	doi = {10.1109/ICSTW.2011.24},
	abstract = {Dynamically Adaptive Systems modify their behavior and structure in response to changes in their surrounding environment and according to an adaptation logic. Critical systems increasingly incorporate dynamic adaptation capabilities; examples include disaster relief and space exploration systems. In this paper, we focus on mutation testing of the adaptation logic. We propose a fault model for adaptation logics that classiﬁes faults into environmental completeness and adaptation correctness. Since there are several adaptation logic languages relying on the same underlying concepts, the fault model is expressed independently from speciﬁc adaptation languages. Taking beneﬁt from model-driven engineering technology, we express these common concepts in a metamodel and deﬁne the operational semantics of mutation operators at this level. Mutation is applied on model elements and model transformations are used to propagate these changes to a given adaptation policy in the chosen formalism. Preliminary results on an adaptive web server highlight the difﬁculty of killing mutants for adaptive systems, and thus the difﬁculty of generating efﬁcient tests.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Bartel, Alexandre and Baudry, Benoit and Munoz, Freddy and Klein, Jacques and Mouelhi, Tejeddine and Le Traon, Yves},
	month = mar,
	year = {2011},
	pages = {408--413},
	file = {Bartel et al. - 2011 - Model Driven Mutation Applied to Adaptative System.pdf:/home/fmind/Documents/Zotero/storage/H7E48HUB/Bartel et al. - 2011 - Model Driven Mutation Applied to Adaptative System.pdf:application/pdf}
}

@inproceedings{bartel_dexpler:_2012,
	title = {Dexpler: converting {Android} {Dalvik} bytecode to {Jimple} for static analysis with {Soot}},
	isbn = {978-1-4503-1490-9},
	shorttitle = {Dexpler},
	url = {http://dl.acm.org/citation.cfm?doid=2259051.2259056},
	doi = {10.1145/2259051.2259056},
	abstract = {This paper introduces Dexpler, a software package which converts Dalvik bytecode to Jimple. Dexpler is built on top of Dedexer and Soot. As Jimple is Soot’s main internal representation of code, the Dalvik bytecode can be manipulated with any Jimple based tool, for instance for performing point-to or ﬂow analysis.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Bartel, Alexandre and Klein, Jacques and Le Traon, Yves and Monperrus, Martin},
	year = {2012},
	keywords = {static, characterization},
	pages = {27--38},
	file = {Bartel et al. - 2012 - Dexpler converting Android Dalvik bytecode to Jim.pdf:/home/fmind/Documents/Zotero/storage/R8VCWFT4/Bartel et al. - 2012 - Dexpler converting Android Dalvik bytecode to Jim.pdf:application/pdf}
}

@inproceedings{bartel_automatically_2012,
	title = {Automatically {Securing} {Permission}-{Based} {Software} by {Reducing} the {Attack} {Surface}: {An} {Application} to {Android}},
	isbn = {978-1-4503-1204-2},
	shorttitle = {Automatically securing permission-based software by reducing the attack surface},
	url = {http://dl.acm.org/citation.cfm?doid=2351676.2351722},
	doi = {10.1145/2351676.2351722},
	abstract = {Android based devices are becoming widespread. As a result and since those devices contain personal and conﬁdential data, the security model of the android software stack has been analyzed extensively. One key feature of the security model is that applications must declare a list of permissions they are using to access resources. Using static analysis, we ﬁrst extracted a table from the Android API which maps methods to permissions. Then, we use this mapping within a tool we developed to check that applications effectively need all the permissions they declare. Using our tool on a set of android applications, we found out that a non negligible part of the applications do not use all the permissions they declare. Consequently, the attack surface of such applications can be reduced by removing the non-needed permissions.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Bartel, Alexandre and Klein, Jacques and Le Traon, Yves and Monperrus, Martin},
	year = {2012},
	keywords = {static, permissions},
	pages = {274},
	file = {Bartel et al. - 2012 - Automatically securing permission-based software b.pdf:/home/fmind/Documents/Zotero/storage/7SRW852B/Bartel et al. - 2012 - Automatically securing permission-based software b.pdf:application/pdf}
}

@article{hurier_lack_2016,
	title = {On the {Lack} of {Consensus} in {Anti}-{Virus} {Decisions}: {Metrics} and {Insights} on {Building} {Ground} {Truths} of {Android} {Malware} with {VirusTotal}},
	copyright = {All rights reserved},
	abstract = {There is generally a lack of consensus in Antivirus (AV) engines' decisions on a given sample. This challenges the building of authoritative ground-truth datasets. Instead, researchers and practitioners may rely on unvalidated approaches to build their ground truth, e.g., by considering decisions from a selected set of Antivirus vendors or by setting up a threshold number of positive detections before classifying a sample. Both approaches are biased as they implicitly either decide on ranking AV products, or they consider that all AV decisions have equal weights. In this paper, we extensively investigate the lack of agreement among AV engines. To that end, we propose a set of metrics that quantitatively describe the di erent dimensions of this lack of consensus. We show how our metrics can bring important insights by using the detection results of 66 AV products on 2 million Android apps as a case study. Our analysis focuses not only on AV binary decision but also on the notoriously hard problem of labels that AVs associate with suspicious  les, and allows to highlight biases hidden in the collection of a malware ground truth a foundation stone of any machine learning-based malware detection approach.},
	language = {en},
	author = {Hurier, Médéric and Allix, Kevin and Bissyandé, Tegawendé F and Klein, Jacques and Traon, Yves Le},
	month = jul,
	year = {2016},
	keywords = {vt-analysis},
	pages = {20},
	file = {Hurier et al. - Metrics and Insights on Building Ground Truths of .pdf:/home/fmind/Documents/Zotero/storage/XG3AZ6XT/Hurier et al. - Metrics and Insights on Building Ground Truths of .pdf:application/pdf}
}

@misc{hurier_idea_nodate,
	title = {Idea a {Machine} {Apprenticeship} {Approach} for {Android} {Malware} {Analysis}.pdf},
	copyright = {All rights reserved},
	abstract = {Android malware are becoming more diverse and complex over the years. To manage their evolution, security researchers have proposed supervised learning models able to automatically train a system to perform pattern recognition tasks. In practice, these systems have shown promising results, but are not yet as transparent and adaptable as the analysis of human experts. They also suffer from several limitations that could limit their implementation in a production environment. In this paper, we propose to evaluate a technique from the field of Artificial Intelligence called Machine Apprenticeship that could address these short-comings. Instead of manually selected features, this approach promotes the use of expert demonstrations that guide the system through learning a complex task. This idea was successfully implemented in other fields, such in video games and robot locomotion, but is still unexplored in the security domain. Applied to malware analysis, this method could lead to autonomous systems more suited for anomaly detection operations.},
	author = {Hurier, Médéric},
	file = {Idea a Machine Apprenticeship Approach for Android Malware Analysis.pdf:/home/fmind/Documents/Zotero/storage/QE4NP4CP/Idea a Machine Apprenticeship Approach for Android Malware Analysis.pdf:application/pdf}
}

@article{abadi_learning_2016,
	title = {{LEARNING} {TO} {PROTECT} {COMMUNICATIONS} {WITH} {ADVERSARIAL} {NEURAL} {CRYPTOGRAPHY}},
	abstract = {We ask whether neural networks can learn to use secret keys to protect information from other neural networks. Speciﬁcally, we focus on ensuring conﬁdentiality properties in a multiagent system, and we specify those properties in terms of an adversary. Thus, a system may consist of neural networks named Alice and Bob, and we aim to limit what a third neural network named Eve learns from eavesdropping on the communication between Alice and Bob. We do not prescribe speciﬁc cryptographic algorithms to these neural networks; instead, we train end-to-end, adversarially. We demonstrate that the neural networks can learn how to perform forms of encryption and decryption, and also how to apply these operations selectively in order to meet conﬁdentiality goals.},
	language = {en},
	author = {Abadi, Martın and Andersen, David G},
	year = {2016},
	pages = {15},
	file = {Abadi and Andersen - 2016 - LEARNING TO PROTECT COMMUNICATIONS WITH ADVERSARIA.pdf:/home/fmind/Documents/Zotero/storage/2RWWK7AG/Abadi and Andersen - 2016 - LEARNING TO PROTECT COMMUNICATIONS WITH ADVERSARIA.pdf:application/pdf}
}

@article{marks_out_2006,
	title = {Out of the {Tar} {Pit}},
	abstract = {Complexity is the single major diﬃculty in the successful development of large-scale software systems. Following Brooks we distinguish accidental from essential diﬃculty, but disagree with his premise that most complexity remaining in contemporary systems is essential. We identify common causes of complexity and discuss general approaches which can be taken to eliminate them where they are accidental in nature. To make things more concrete we then give an outline for a potential complexity-minimizing approach based on functional programming and Codd’s relational model of data.},
	language = {en},
	author = {Marks, Peter},
	year = {2006},
	pages = {66},
	file = {Marks - Ben Moseley ben@moseley.name.pdf:/home/fmind/Documents/Zotero/storage/ET9BD8Q6/Marks - Ben Moseley ben@moseley.name.pdf:application/pdf}
}

@article{brumley_theory_2008,
	title = {Theory and {Techniques} for {Automatic} {Generation} of {Vulnerability}-{Based} {Signatures}},
	volume = {5},
	issn = {1545-5971},
	url = {http://ieeexplore.ieee.org/document/4624274/},
	doi = {10.1109/TDSC.2008.55},
	abstract = {In this paper, we explore the problem of creating vulnerability signatures. A vulnerability signature is based on a program vulnerability and is not specific to any particular exploit. The advantage of vulnerability signatures is that their quality can be guaranteed. In particular, we create vulnerability signatures from the vulnerable program itself, such that they are guaranteed to have zero false positives by construction. We show how to automate signature creation for vulnerabilities that can be detected by a runtime monitor. There is no one right signature representation for a vulnerability. We introduce a formalism and way of thinking about vulnerability signature generation that is analysis centric instead of representation specific. In particular, a signature can be represented in many ways, from using regular expression to using a full Turing-complete language. Previous systems have mostly focused on a particular point in the design space. We show how to approximate the language of a vulnerability in many different language classes, each of which has unique properties and benefits, by performing analysis on the program binary and vulnerability. Our approach also considers multiple-path vulnerabilities. A multiple-path vulnerability is a vulnerability that can be exploited through several different code paths. For example, a Web server may have a vulnerability in a URL handling routine that is called for many different types of requests. We demonstrate techniques that can create signatures that cover multiple paths an exploit may take. We have had to develop new algorithms to cope with the problem where enumerating vulnerable paths leads to an exponential explosion. We develop a new approach that captures the logical semantics of multiple vulnerable program paths in Oðn2Þ space (where n is the size of the program) instead of exponential. We provide a formal definition of a vulnerability signature and investigate the computational complexity of creating and matching vulnerability signatures. We systematically explore the design space of vulnerability signatures. We also provide specific techniques for creating vulnerability signatures in a variety of language classes. In order to demonstrate our techniques, we have built a prototype system. Our experiments show that we can, using a single exploit, automatically generate a vulnerability signature as a regular expression, as a small program, or as a system of constraints. We demonstrate techniques for creating signatures of vulnerabilities that can be exploited via multiple program paths. Our results indicate that our approach is a viable option for signature generation, especially when guarantees are desired.},
	language = {en},
	number = {4},
	urldate = {2018-04-10},
	journal = {IEEE Transactions on Dependable and Secure Computing},
	author = {Brumley, D. and Newsome, J. and Song, D. and {Hao Wang} and Jha, S.},
	month = oct,
	year = {2008},
	pages = {224--241},
	file = {Brumley et al. - 2008 - Theory and Techniques for Automatic Generation of .pdf:/home/fmind/Documents/Zotero/storage/QXH7YAYJ/Brumley et al. - 2008 - Theory and Techniques for Automatic Generation of .pdf:application/pdf}
}

@inproceedings{yamaguchi_modeling_2014,
	title = {Modeling and {Discovering} {Vulnerabilities} with {Code} {Property} {Graphs}},
	isbn = {978-1-4799-4686-0},
	url = {http://ieeexplore.ieee.org/document/6956589/},
	doi = {10.1109/SP.2014.44},
	abstract = {The vast majority of security breaches encountered today are a direct result of insecure code. Consequently, the protection of computer systems critically depends on the rigorous identiﬁcation of vulnerabilities in software, a tedious and errorprone process requiring signiﬁcant expertise. Unfortunately, a single ﬂaw sufﬁces to undermine the security of a system and thus the sheer amount of code to audit plays into the attacker’s cards. In this paper, we present a method to effectively mine large amounts of source code for vulnerabilities. To this end, we introduce a novel representation of source code called a code property graph that merges concepts of classic program analysis, namely abstract syntax trees, control ﬂow graphs and program dependence graphs, into a joint data structure. This comprehensive representation enables us to elegantly model templates for common vulnerabilities with graph traversals that, for instance, can identify buffer overﬂows, integer overﬂows, format string vulnerabilities, or memory disclosures. We implement our approach using a popular graph database and demonstrate its efﬁcacy by identifying 18 previously unknown vulnerabilities in the source code of the Linux kernel.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Yamaguchi, Fabian and Golde, Nico and Arp, Daniel and Rieck, Konrad},
	month = may,
	year = {2014},
	pages = {590--604},
	file = {Yamaguchi et al. - 2014 - Modeling and Discovering Vulnerabilities with Code.pdf:/home/fmind/Documents/Zotero/storage/GBR6WXKT/Yamaguchi et al. - 2014 - Modeling and Discovering Vulnerabilities with Code.pdf:application/pdf}
}

@article{wasserstein_asas_2016,
	title = {The {ASA}'s {Statement} on p-{Values}: {Context}, {Process}, and {Purpose}},
	volume = {70},
	issn = {0003-1305, 1537-2731},
	shorttitle = {The {ASA}'s {Statement} on \textit{p} -{Values}},
	url = {https://www.tandfonline.com/doi/full/10.1080/00031305.2016.1154108},
	doi = {10.1080/00031305.2016.1154108},
	language = {en},
	number = {2},
	urldate = {2018-04-10},
	journal = {The American Statistician},
	author = {Wasserstein, Ronald L. and Lazar, Nicole A.},
	month = apr,
	year = {2016},
	pages = {129--133},
	file = {Wasserstein and Lazar - 2016 - The ASA's Statement on ipi -Values Context, .pdf:/home/fmind/Documents/Zotero/storage/7PD2YCF6/Wasserstein and Lazar - 2016 - The ASA's Statement on ipi -Values Context, .pdf:application/pdf}
}

@article{breiman_statistical_2001,
	title = {Statistical {Modeling}: {The} {Two} {Cultures}},
	abstract = {There are two cultures in the use of statistical modeling to reach conclusions from data. One assumes that the data are generated by a given stochastic data model. The other uses algorithmic models and treats the data mechanism as unknown. The statistical community has been committed to the almost exclusive use of data models. This commitment has led to irrelevant theory, questionable conclusions, and has kept statisticians from working on a large range of interesting current problems. Algorithmic modeling, both in theory and practice, has developed rapidly in ﬁelds outside statistics. It can be used both on large complex data sets and as a more accurate and informative alternative to data modeling on smaller data sets. If our goal as a ﬁeld is to use data to solve problems, then we need to move away from exclusive dependence on data models and adopt a more diverse set of tools.},
	language = {en},
	journal = {THE TWO CULTURES},
	author = {Breiman, Leo},
	month = aug,
	year = {2001},
	pages = {33},
	file = {Breiman - Statistical Modeling The Two Cultures.pdf:/home/fmind/Documents/Zotero/storage/XESGGSFP/Breiman - Statistical Modeling The Two Cultures.pdf:application/pdf}
}

@inproceedings{arcuri_practical_2011,
	title = {A practical guide for using statistical tests to assess randomized algorithms in software engineering},
	isbn = {978-1-4503-0445-0},
	url = {http://portal.acm.org/citation.cfm?doid=1985793.1985795},
	doi = {10.1145/1985793.1985795},
	abstract = {Randomized algorithms have been used to successfully address many different types of software engineering problems. This type of algorithms employ a degree of randomness as part of their logic. Randomized algorithms are useful for difﬁcult problems where a precise solution cannot be derived in a deterministic way within reasonable time. However, randomized algorithms produce different results on every run when applied to the same problem instance. It is hence important to assess the effectiveness of randomized algorithms by collecting data from a large enough number of runs. The use of rigorous statistical tests is then essential to provide support to the conclusions derived by analyzing such data. In this paper, we provide a systematic review of the use of randomized algorithms in selected software engineering venues in 2009. Its goal is not to perform a complete survey but to get a representative snapshot of current practice in software engineering research. We show that randomized algorithms are used in a signiﬁcant percentage of papers but that, in most cases, randomness is not properly accounted for. This casts doubts on the validity of most empirical results assessing randomized algorithms. There are numerous statistical tests, based on different assumptions, and it is not always clear when and how to use these tests. We hence provide practical guidelines to support empirical research on randomized algorithms in software engineering.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Arcuri, Andrea and Briand, Lionel},
	year = {2011},
	pages = {1},
	file = {Arcuri and Briand - 2011 - A practical guide for using statistical tests to a.pdf:/home/fmind/Documents/Zotero/storage/2YN8IC4P/Arcuri and Briand - 2011 - A practical guide for using statistical tests to a.pdf:application/pdf}
}

@article{sebastiani_tutorial_2010,
	title = {A {Tutorial} on {Probability} {Theory}},
	language = {en},
	author = {Sebastiani, Paola},
	year = {2010},
	pages = {25},
	file = {Sebastiani - A Tutorial on Probability Theory.pdf:/home/fmind/Documents/Zotero/storage/PHB5FNPZ/Sebastiani - A Tutorial on Probability Theory.pdf:application/pdf}
}

@incollection{hutchison_firma:_2013,
	address = {Berlin, Heidelberg},
	title = {{FIRMA}: {Malware} {Clustering} and {Network} {Signature} {Generation} with {Mixed} {Network} {Behaviors}},
	volume = {8145},
	isbn = {978-3-642-41283-7 978-3-642-41284-4},
	shorttitle = {{FIRMA}},
	url = {http://link.springer.com/10.1007/978-3-642-41284-4_8},
	abstract = {The ever-increasing number of malware families and polymorphic variants creates a pressing need for automatic tools to cluster the collected malware into families and generate behavioral signatures for their detection. Among these, network trafﬁc is a powerful behavioral signature and network signatures are widely used by network administrators. In this paper we present FIRMA, a tool that given a large pool of network trafﬁc obtained by executing unlabeled malware binaries, generates a clustering of the malware binaries into families and a set of network signatures for each family. Compared with prior tools, FIRMA produces network signatures for each of the network behaviors of a family, regardless of the type of trafﬁc the malware uses (e.g., HTTP, IRC, SMTP, TCP, UDP). We have implemented FIRMA and evaluated it on two recent datasets comprising nearly 16,000 unique malware binaries. Our results show that FIRMA’s clustering has very high precision (100\% on a labeled dataset) and recall (97.7\%). We compare FIRMA’s signatures with manually generated ones, showing that they are as good (often better), while generated in a fraction of the time.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Research in {Attacks}, {Intrusions}, and {Defenses}},
	publisher = {Springer Berlin Heidelberg},
	author = {Rafique, M. Zubair and Caballero, Juan},
	editor = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard and Stolfo, Salvatore J. and Stavrou, Angelos and Wright, Charles V.},
	year = {2013},
	doi = {10.1007/978-3-642-41284-4_8},
	pages = {144--163},
	file = {Rafique and Caballero - 2013 - FIRMA Malware Clustering and Network Signature Ge.pdf:/home/fmind/Documents/Zotero/storage/RR8XRIDK/Rafique and Caballero - 2013 - FIRMA Malware Clustering and Network Signature Ge.pdf:application/pdf}
}

@article{perdisci_behavioral_2010,
	title = {Behavioral {Clustering} of {HTTP}-{Based} {Malware} and {Signature} {Generation} {Using} {Malicious} {Network} {Traces}},
	abstract = {We present a novel network-level behavioral malware clustering system. We focus on analyzing the structural similarities among malicious HTTP trafﬁc traces generated by executing HTTP-based malware. Our work is motivated by the need to provide quality input to algorithms that automatically generate network signatures. Accordingly, we deﬁne similarity metrics among HTTP traces and develop our system so that the resulting clusters can yield high-quality malware signatures.},
	language = {en},
	author = {Perdisci, Roberto and Lee, Wenke and Feamster, Nick},
	month = apr,
	year = {2010},
	pages = {14},
	file = {Perdisci et al. - Behavioral Clustering of HTTP-Based Malware and Si.pdf:/home/fmind/Documents/Zotero/storage/NW8A3IMF/Perdisci et al. - Behavioral Clustering of HTTP-Based Malware and Si.pdf:application/pdf}
}

@article{le_traon_design_2006,
	title = {Design by {Contract} to {Improve} {Software} {Vigilance}},
	volume = {32},
	issn = {0098-5589, 1939-3520},
	url = {http://ieeexplore.ieee.org/document/1703388/},
	doi = {10.1109/TSE.2006.79},
	abstract = {Design by Contract is a lightweight technique for embedding elements of formal specification (such as invariants, pre and postconditions) into an object-oriented design. When contracts are made executable, they can play the role of embedded, online oracles. Executable contracts allow components to be responsive to erroneous states and, thus, may help in detecting and locating faults. In this paper, we define Vigilance as the degree to which a program is able to detect an erroneous state at runtime. Diagnosability represents the effort needed to locate a fault once it has been detected. In order to estimate the benefit of using Design by Contract, we formalize both notions of Vigilance and Diagnosability as software quality measures. The main steps of measure elaboration are given, from informal definitions of the factors to be measured to the mathematical model of the measures. As is the standard in this domain, the parameters are then fixed through actual measures, based on a mutation analysis in our case. Several measures are presented that reveal and estimate the contribution of contracts to the overall quality of a system in terms of vigilance and diagnosability.},
	language = {en},
	number = {8},
	urldate = {2018-04-10},
	journal = {IEEE Transactions on Software Engineering},
	author = {Le Traon, Y. and Baudry, B. and Jezequel, J.-M.},
	month = aug,
	year = {2006},
	pages = {571--586},
	file = {Le Traon et al. - 2006 - Design by Contract to Improve Software Vigilance.pdf:/home/fmind/Documents/Zotero/storage/43C487W2/Le Traon et al. - 2006 - Design by Contract to Improve Software Vigilance.pdf:application/pdf}
}

@article{jacobs_pathologies_2009,
	title = {The {Pathologies} of {Big} {Data}},
	language = {en},
	author = {Jacobs, Adam},
	month = jul,
	year = {2009},
	pages = {12},
	file = {Jacobs - The Pathologies of Big Data.pdf:/home/fmind/Documents/Zotero/storage/X34LBYUD/Jacobs - The Pathologies of Big Data.pdf:application/pdf}
}

@article{white_evolving_2011,
	title = {The {Evolving} {Role} of the {Enterprise} {Data} {Warehouse} in the {Era} of {Big} {Data} {Analytics}},
	language = {en},
	author = {White, A Kimball Group and Kimball, Ralph},
	year = {2011},
	pages = {33},
	file = {White and Kimball - The Evolving Role of the Enterprise Data Warehouse.pdf:/home/fmind/Documents/Zotero/storage/TM3K9ZRM/White and Kimball - The Evolving Role of the Enterprise Data Warehouse.pdf:application/pdf}
}

@article{provost_data_2013,
	title = {Data {Science} and its {Relationship} to {Big} {Data} and {Data}-{Driven} {Decision} {Making}},
	volume = {1},
	issn = {2167-6461, 2167-647X},
	url = {http://online.liebertpub.com/doi/10.1089/big.2013.1508},
	doi = {10.1089/big.2013.1508},
	abstract = {Companies have realized they need to hire data scientists, academic institutions are scrambling to put together data-science programs, and publications are touting data science as a hot—even ‘‘sexy’’—career choice. However, there is confusion about what exactly data science is, and this confusion could lead to disillusionment as the concept diffuses into meaningless buzz. In this article, we argue that there are good reasons why it has been hard to pin down exactly what is data science. One reason is that data science is intricately intertwined with other important concepts also of growing importance, such as big data and data-driven decision making. Another reason is the natural tendency to associate what a practitioner does with the deﬁnition of the practitioner’s ﬁeld; this can result in overlooking the fundamentals of the ﬁeld. We believe that trying to deﬁne the boundaries of data science precisely is not of the utmost importance. We can debate the boundaries of the ﬁeld in an academic setting, but in order for data science to serve business effectively, it is important (i) to understand its relationships to other important related concepts, and (ii) to begin to identify the fundamental principles underlying data science. Once we embrace (ii), we can much better understand and explain exactly what data science has to offer. Furthermore, only once we embrace (ii) should we be comfortable calling it data science. In this article, we present a perspective that addresses all these concepts. We close by offering, as examples, a partial list of fundamental principles underlying data science.},
	language = {en},
	number = {1},
	urldate = {2018-04-10},
	journal = {Big Data},
	author = {Provost, Foster and Fawcett, Tom},
	month = mar,
	year = {2013},
	pages = {51--59},
	file = {Provost and Fawcett - 2013 - Data Science and its Relationship to Big Data and .pdf:/home/fmind/Documents/Zotero/storage/AVAWY5CS/Provost and Fawcett - 2013 - Data Science and its Relationship to Big Data and .pdf:application/pdf}
}

@article{nigam_text_2000,
	title = {Text {Classification} from {Labeled} and {Unlabeled} {Documents} using {EM}},
	abstract = {This paper shows that the accuracy of learned text classiﬁers can be improved by augmenting a small number of labeled training documents with a large pool of unlabeled documents. This is important because in many text classiﬁcation problems obtaining training labels is expensive, while large quantities of unlabeled documents are readily available.},
	language = {en},
	author = {NIGAM, KAMAL},
	year = {2000},
	pages = {32},
	file = {NIGAM - Text Classification from Labeled and Unlabeled Doc.pdf:/home/fmind/Documents/Zotero/storage/BXWGF59D/NIGAM - Text Classification from Labeled and Unlabeled Doc.pdf:application/pdf}
}

@article{goethals_survey_2003,
	title = {Survey on {Frequent} {Pattern} {Mining}},
	language = {en},
	author = {Goethals, Bart},
	year = {2003},
	pages = {43},
	file = {Goethals - Survey on Frequent Pattern Mining.pdf:/home/fmind/Documents/Zotero/storage/2E7KZRXB/Goethals - Survey on Frequent Pattern Mining.pdf:application/pdf}
}

@inproceedings{melnik_similarity_2002,
	title = {Similarity flooding: a versatile graph matching algorithm and its application to schema matching},
	isbn = {978-0-7695-1531-1},
	shorttitle = {Similarity flooding},
	url = {http://ieeexplore.ieee.org/document/994702/},
	doi = {10.1109/ICDE.2002.994702},
	abstract = {Matching elements of two data schemas or two data instances plays a key role in data warehousing, e-business, or even biochemical applications. In this paper we present a matching algorithm based on a ﬁxpoint computation that is usable across different scenarios. The algorithm takes two graphs (schemas, catalogs, or other data structures) as input, and produces as output a mapping between corresponding nodes of the graphs. Depending on the matching goal, a subset of the mapping is chosen using ﬁlters. After our algorithm runs, we expect a human to check and if necessary adjust the results. As a matter of fact, we evaluate the ‘accuracy’ of the algorithm by counting the number of needed adjustments. We conducted a user study, in which our accuracy metric was used to estimate the labor savings that the users could obtain by utilizing our algorithm to obtain an initial matching. Finally, we illustrate how our matching algorithm is deployed as one of several high-level operators in an implemented testbed for managing information models and mappings.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE Comput. Soc},
	author = {Melnik, S. and Garcia-Molina, H. and Rahm, E.},
	year = {2002},
	pages = {117--128},
	file = {Melnik et al. - 2002 - Similarity flooding a versatile graph matching al.pdf:/home/fmind/Documents/Zotero/storage/NAUG8JN3/Melnik et al. - 2002 - Similarity flooding a versatile graph matching al.pdf:application/pdf}
}

@article{wang_semap:_nodate,
	title = {{SeMap}: {A} {Generic} {Mapping} {Construction} {System}},
	abstract = {Most previous schema mapping works focus on creating mappings in speciﬁc data models for data transformation, failing to capture a richer set of possible relationships between schema elements. For example, most schema matching approaches might discover that ‘TA’ in one schema equals ‘grad TA’ in another one, even though the relationship can be modeled more accurately by saying that ‘grad TA’ is a specialization of ‘TA’. Deepening the mapping semantics in turn allow richer application semantics. This paper presents and proves the eﬀectiveness of SeMap, a system that constructs a complex, semantically richer mapping (including ‘Has-a’, ‘Is-a’, ‘Associates’ and ‘Equivalent’ relationship types) that can be used across data models. We achieve this goal by: (1) exploiting semantic evidence for possible matches; (2) ﬁnding a globally optimal match assignment; (3) identifying the relationship embedded in the selected matches. We implemented our semantic matching approach within a prototype system, SeMap, and showed its accuracy and eﬀectiveness.},
	language = {en},
	author = {Wang, Ting and Pottinger, Rachel},
	pages = {12},
	file = {Wang and Pottinger - SeMap A Generic Mapping Construction System.pdf:/home/fmind/Documents/Zotero/storage/HI8AIKC3/Wang and Pottinger - SeMap A Generic Mapping Construction System.pdf:application/pdf}
}

@article{ganjam_robust_2003,
	title = {Robust and {Efficient} {Fuzzy} {Match} for {Online} {Data} {Cleaning}},
	abstract = {To ensure high data quality, data warehouses must validate and cleanse incoming data tuples from external sources. In many situations, clean tuples must match acceptable tuples in reference tables. For example, product name and description fields in a sales record from a distributor must match the pre-recorded name and description fields in a product reference relation.},
	language = {en},
	author = {Ganjam, Surajit Chaudhuri Kris and Ganti, Venkatesh and Motwani, Rajeev},
	month = jan,
	year = {2003},
	pages = {12},
	file = {Ganjam et al. - Robust and Efficient Fuzzy Match for Online Data C.pdf:/home/fmind/Documents/Zotero/storage/4Q3JMF2Y/Ganjam et al. - Robust and Efficient Fuzzy Match for Online Data C.pdf:application/pdf}
}

@article{newcombe_record_1962,
	title = {Record linkage: making maximum use of the discriminating power of identifying information},
	volume = {5},
	issn = {00010782},
	shorttitle = {Record linkage},
	url = {http://portal.acm.org/citation.cfm?doid=368996.369026},
	doi = {10.1145/368996.369026},
	language = {en},
	number = {11},
	urldate = {2018-04-10},
	journal = {Communications of the ACM},
	author = {Newcombe, Howard B. and Kennedy, James M.},
	month = nov,
	year = {1962},
	pages = {563--566},
	file = {Newcombe and Kennedy - 1962 - Record linkage making maximum use of the discrimi.pdf:/home/fmind/Documents/Zotero/storage/XEVMCY84/Newcombe and Kennedy - 1962 - Record linkage making maximum use of the discrimi.pdf:application/pdf}
}

@article{raykar_ranking_2011,
	title = {Ranking annotators for crowdsourced labeling tasks},
	abstract = {With the advent of crowdsourcing services it has become quite cheap and reasonably effective to get a dataset labeled by multiple annotators in a short amount of time. Various methods have been proposed to estimate the consensus labels by correcting for the bias of annotators with different kinds of expertise. Often we have low quality annotators or spammers–annotators who assign labels randomly (e.g., without actually looking at the instance). Spammers can make the cost of acquiring labels very expensive and can potentially degrade the quality of the consensus labels. In this paper we formalize the notion of a spammer and deﬁne a score which can be used to rank the annotators—with the spammers having a score close to zero and the good annotators having a high score close to one.},
	language = {en},
	author = {Raykar, Vikas C and Yu, Shipeng},
	year = {2011},
	pages = {9},
	file = {Raykar and Yu - Ranking annotators for crowdsourced labeling tasks.pdf:/home/fmind/Documents/Zotero/storage/SZPQ38JR/Raykar and Yu - Ranking annotators for crowdsourced labeling tasks.pdf:application/pdf}
}

@inproceedings{sheng_get_2008,
	title = {Get another label? improving data quality and data mining using multiple, noisy labelers},
	isbn = {978-1-60558-193-4},
	shorttitle = {Get another label?},
	url = {http://dl.acm.org/citation.cfm?doid=1401890.1401965},
	doi = {10.1145/1401890.1401965},
	abstract = {This paper addresses the repeated acquisition of labels for data items when the labeling is imperfect. We examine the improvement (or lack thereof) in data quality via repeated labeling, and focus especially on the improvement of training labels for supervised induction. With the outsourcing of small tasks becoming easier, for example via Rent-A-Coder or Amazon’s Mechanical Turk, it often is possible to obtain less-than-expert labeling at low cost. With low-cost labeling, preparing the unlabeled part of the data can become considerably more expensive than labeling. We present repeated-labeling strategies of increasing complexity, and show several main results. (i) Repeated-labeling can improve label quality and model quality, but not always. (ii) When labels are noisy, repeated labeling can be preferable to single labeling even in the traditional setting where labels are not particularly cheap. (iii) As soon as the cost of processing the unlabeled data is not free, even the simple strategy of labeling everything multiple times can give considerable advantage. (iv) Repeatedly labeling a carefully chosen set of points is generally preferable, and we present a robust technique that combines diﬀerent notions of uncertainty to select data points for which quality should be improved. The bottom line: the results show clearly that when labeling is not perfect, selective acquisition of multiple labels is a strategy that data miners should have in their repertoire; for certain label-quality/cost regimes, the beneﬁt is substantial.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Sheng, Victor S. and Provost, Foster and Ipeirotis, Panagiotis G.},
	year = {2008},
	pages = {614},
	file = {Sheng et al. - 2008 - Get another label improving data quality and data.pdf:/home/fmind/Documents/Zotero/storage/YU5Z46CL/Sheng et al. - 2008 - Get another label improving data quality and data.pdf:application/pdf}
}

@article{madhavan_generic_2011,
	title = {Generic {Schema} {Matching} with {Cupid}},
	abstract = {Schema matching is a critical step in many applications, such as XML message mapping, data warehouse loading, and schema integration. In this paper, we investigate algorithms for generic schema matching, outside of any particular data model or application. We first present a taxonomy for past solutions, showing that a rich range of techniques is available. We then propose a new algorithm, Cupid, that discovers mappings between schema elements based on their names, data types, constraints, and schema structure, using a broader set of techniques than past approaches. Some of our innovations are the integrated use of linguistic and structural matching, context-dependent matching of shared types, and a bias toward leaf structure where much of the schema content resides. After describing our algorithm, we present experimental results that compare Cupid to two other schema matching systems.},
	language = {en},
	author = {Madhavan, Jayant and Bernstein, Philip A and Rahm, Erhard},
	year = {2011},
	pages = {15},
	file = {Madhavan et al. - Generic Schema Matching with Cupid.pdf:/home/fmind/Documents/Zotero/storage/FNAIPZRT/Madhavan et al. - Generic Schema Matching with Cupid.pdf:application/pdf}
}

@article{bernstein_generic_2011,
	title = {Generic {Schema} {Matching}, {Ten} {Years} {Later}},
	abstract = {In a paper published in the 2001 VLDB Conference, we proposed treating generic schema matching as an independent problem. We developed a taxonomy of existing techniques, a new schema matching algorithm, and an approach to comparative evaluation. Since then, the field has grown into a major research topic. We briefly summarize the new techniques that have been developed and applications of the techniques in the commercial world. We conclude by discussing future trends and recommendations for further work.},
	language = {en},
	author = {Bernstein, Philip A and Madhavan, Jayant and Rahm, Erhard},
	year = {2011},
	pages = {7},
	file = {Bernstein et al. - Generic Schema Matching, Ten Years Later.pdf:/home/fmind/Documents/Zotero/storage/AEYITLWA/Bernstein et al. - Generic Schema Matching, Ten Years Later.pdf:application/pdf}
}

@incollection{goos_discovering_1999,
	address = {Berlin, Heidelberg},
	title = {Discovering {Frequent} {Closed} {Itemsets} for {Association} {Rules}},
	volume = {1540},
	isbn = {978-3-540-65452-0 978-3-540-49257-3},
	url = {http://link.springer.com/10.1007/3-540-49257-7_25},
	abstract = {In this paper, we address the problem of nding frequent itemsets in a database. Using the closed itemset lattice framework, we show that this problem can be reduced to the problem of nding frequent closed itemsets. Based on this statement, we can construct e cient data mining algorithms by limiting the search space to the closed itemset lattice rather than the subset lattice. Moreover, we show that the set of all frequent closed itemsets su ces to determine a reduced set of association rules, thus addressing another important data mining problem: limiting the number of rules produced without information loss. We propose a new algorithm, called A-Close, using a closure mechanism to nd frequent closed itemsets. We realized experiments to compare our approach to the commonly used frequent itemset search approach. Those experiments showed that our approach is very valuable for dense and/or correlated data that represent an important part of existing databases.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Database {Theory} — {ICDT}’99},
	publisher = {Springer Berlin Heidelberg},
	author = {Pasquier, Nicolas and Bastide, Yves and Taouil, Rafik and Lakhal, Lotfi},
	editor = {Goos, Gerhard and Hartmanis, Juris and van Leeuwen, Jan and Beeri, Catriel and Buneman, Peter},
	year = {1999},
	doi = {10.1007/3-540-49257-7_25},
	pages = {398--416},
	file = {Pasquier et al. - 1999 - Discovering Frequent Closed Itemsets for Associati.pdf:/home/fmind/Documents/Zotero/storage/GTC3VGR2/Pasquier et al. - 1999 - Discovering Frequent Closed Itemsets for Associati.pdf:application/pdf}
}

@inproceedings{ramakrishnan_model_2005,
	title = {A model for handling approximate, noisy or incomplete labeling in text classification},
	isbn = {978-1-59593-180-1},
	url = {http://portal.acm.org/citation.cfm?doid=1102351.1102437},
	doi = {10.1145/1102351.1102437},
	abstract = {We introduce a Bayesian model, BayesANIL, that is capable of estimating uncertainties associated with the labeling process. Given a labeled or partially labeled training corpus of text documents, the model estimates the joint distribution of training documents and class labels by using a generalization of the Expectation Maximization algorithm. The estimates can be used in standard classiﬁcation models to reduce error rates. Since uncertainties in the labeling are taken into account, the model provides an elegant mechanism to deal with noisy labels. We provide an intuitive modiﬁcation to the EM iterations by re-estimating the empirical distribution in order to reinforce feature values in unlabeled data and to reduce the inﬂuence of noisily labeled examples. Considerable improvement in the classiﬁcation accuracies of two popular classiﬁcation algorithms on standard labeled data-sets with and without artiﬁcially introduced noise, as well as in the presence and absence of unlabeled data, indicates that this may be a promising method to reduce the burden of manual labeling.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Ramakrishnan, Ganesh and Chitrapura, Krishna Prasad and Krishnapuram, Raghu and Bhattacharyya, Pushpak},
	year = {2005},
	pages = {681--688},
	file = {Ramakrishnan et al. - 2005 - A model for handling approximate, noisy or incompl.pdf:/home/fmind/Documents/Zotero/storage/PKJXVJQ5/Ramakrishnan et al. - 2005 - A model for handling approximate, noisy or incompl.pdf:application/pdf}
}

@article{serenko_agent_2002,
	title = {{AGENT} {TOOLKITS}: {A} {GENERAL} {OVERVIEW} {OF} {THE} {MARKET} {AND} {AN} {ASSESSMENT} {OF} {INSTRUCTOR} {SATISFACTION} {WITH} {UTILIZING} {TOOLKITS} {IN} {THE} {CLASSROOM}},
	language = {en},
	author = {Serenko, Alexander and Detlor, Brian},
	month = jul,
	year = {2002},
	pages = {49},
	file = {Serenko and Detlor - AGENT TOOLKITS A GENERAL OVERVIEW OF THE MARKET A.pdf:/home/fmind/Documents/Zotero/storage/SCUD56QP/Serenko and Detlor - AGENT TOOLKITS A GENERAL OVERVIEW OF THE MARKET A.pdf:application/pdf;Serenko and Detlor - AGENT TOOLKITS A GENERAL OVERVIEW OF THE MARKET A.pdf:/home/fmind/Documents/Zotero/storage/DZTW7TCV/Serenko and Detlor - AGENT TOOLKITS A GENERAL OVERVIEW OF THE MARKET A.pdf:application/pdf}
}

@inproceedings{kemp_problems_1993,
	title = {Problems in expert systems development},
	isbn = {978-0-8186-4260-9},
	url = {http://ieeexplore.ieee.org/document/323053/},
	doi = {10.1109/ANNES.1993.323053},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE Comput. Soc. Press},
	author = {Kemp, E.A.},
	year = {1993},
	pages = {166--167},
	file = {Kemp - 1993 - Problems in expert systems development.pdf:/home/fmind/Documents/Zotero/storage/54MCTFH9/Kemp - 1993 - Problems in expert systems development.pdf:application/pdf}
}

@inproceedings{russell_learning_1998,
	title = {Learning agents for uncertain environments (extended abstract)},
	isbn = {978-1-58113-057-7},
	url = {http://portal.acm.org/citation.cfm?doid=279943.279964},
	doi = {10.1145/279943.279964},
	abstract = {This talk proposes a very simple “baseline architecture” for a learning agent that can handle stochastic, partially observable environments. The architecture uses reinforcement learning together with a method for representing temporal processes as graphical models. I will discuss methods for learning the parameters and structure of such representations from sensory inputs, and for computing posterior probabilities. Some open problems remain before we can try out the complete agent; more arise when we consider scaling up.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Russell, Stuart},
	year = {1998},
	pages = {101--103},
	file = {Russell - 1998 - Learning agents for uncertain environments (extend.pdf:/home/fmind/Documents/Zotero/storage/I6AM7VUT/Russell - 1998 - Learning agents for uncertain environments (extend.pdf:application/pdf}
}

@article{mnih_human-level_2015,
	title = {Human-level control through deep reinforcement learning},
	volume = {518},
	issn = {0028-0836, 1476-4687},
	url = {http://www.nature.com/articles/nature14236},
	doi = {10.1038/nature14236},
	language = {en},
	number = {7540},
	urldate = {2018-04-10},
	journal = {Nature},
	author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis},
	month = feb,
	year = {2015},
	pages = {529--533},
	file = {Mnih et al. - 2015 - Human-level control through deep reinforcement lea.pdf:/home/fmind/Documents/Zotero/storage/HQ9AYK5F/Mnih et al. - 2015 - Human-level control through deep reinforcement lea.pdf:application/pdf}
}

@article{laukien_feynman_2016,
	title = {Feynman {Machine}: {The} {Universal} {Dynamical} {Systems} {Computer}},
	abstract = {Efforts at understanding the computational processes in the brain have met with limited success, despite their importance and potential uses in building intelligent machines. We propose a simple new model which draws on recent ﬁndings in Neuroscience and the Applied Mathematics of interacting Dynamical Systems. The Feynman Machine is a Universal Computer for Dynamical Systems, analogous to the Turing Machine for symbolic computing, but with several important differences. We demonstrate that networks and hierarchies of simple interacting Dynamical Systems, each adaptively learning to forecast its evolution, are capable of automatically building sensorimotor models of the external and internal world. We identify such networks in mammalian neocortex, and show how existing theories of cortical computation combine with our model to explain the power and ﬂexibility of mammalian intelligence. These ﬁndings lead directly to new architectures for machine intelligence. A suite of software implementations has been built based on these principles, and applied to a number of spatiotemporal learning tasks.},
	language = {en},
	author = {Laukien, Eric and Crowder, Richard and Byrne, Fergal},
	month = sep,
	year = {2016},
	pages = {28},
	file = {Laukien et al. - Feynman Machine The Universal Dynamical Systems C.pdf:/home/fmind/Documents/Zotero/storage/QKDM8WS6/Laukien et al. - Feynman Machine The Universal Dynamical Systems C.pdf:application/pdf}
}

@article{kravari_survey_2015,
	title = {A {Survey} of {Agent} {Platforms}},
	volume = {18},
	issn = {1460-7425},
	url = {http://jasss.soc.surrey.ac.uk/18/1/11.html},
	doi = {10.18564/jasss.2661},
	abstract = {From computer games to human societies, many natural and artificial phenomena can be represented as multi-agent systems. Over time, these systems have been proven a really powerful tool for modelling and understanding phenomena in fields, such as economics and trading, health care, urban planning and social sciences. However, although, intelligent agents have been around for years, their actual implementation is still in its early stages. Since the late nineties many agent platforms have been developed. Some of them have already been abandoned whereas others continue releasing new versions. On the other hand, the agent-oriented research community is still providing more and more new platforms. This vast amount of platform options leads to a high degree of heterogeneity. Hence, a common problem is how people interested in using multi-agent systems should choose which platform to use in order to benefit from agent technology. This decision was usually left to word of mouth, past experiences or platform publicity, lately however people depend on solid survey articles. To date, in most cases multiagent system surveys describe only the basic characteristics of a few representatives without even providing any classification of the systems themselves. This article presents a comparative up-to-date review of the most promising existing agent platforms that can be used. It is based on universal comparison and evaluation criteria, proposing classifications for helping readers to understand which agent platforms broadly exhibit similar properties and in which situations which choices should be made.},
	language = {en},
	number = {1},
	urldate = {2018-04-10},
	journal = {Journal of Artificial Societies and Social Simulation},
	author = {Kravari, Kalliopi and Bassiliades, Nick},
	year = {2015},
	file = {Kravari and Bassiliades - 2015 - A Survey of Agent Platforms.pdf:/home/fmind/Documents/Zotero/storage/FIVUWFV5/Kravari and Bassiliades - 2015 - A Survey of Agent Platforms.pdf:application/pdf}
}

@inproceedings{abbeel_apprenticeship_2004,
	title = {Apprenticeship learning via inverse reinforcement learning},
	url = {http://portal.acm.org/citation.cfm?doid=1015330.1015430},
	doi = {10.1145/1015330.1015430},
	abstract = {We consider learning in a Markov decision process where we are not explicitly given a reward function, but where instead we can observe an expert demonstrating the task that we want to learn to perform. This setting is useful in applications (such as the task of driving) where it may be diﬃcult to write down an explicit reward function specifying exactly how diﬀerent desiderata should be traded oﬀ. We think of the expert as trying to maximize a reward function that is expressible as a linear combination of known features, and give an algorithm for learning the task demonstrated by the expert. Our algorithm is based on using “inverse reinforcement learning” to try to recover the unknown reward function. We show that our algorithm terminates in a small number of iterations, and that even though we may never recover the expert’s reward function, the policy output by the algorithm will attain performance close to that of the expert, where here performance is measured with respect to the expert’s unknown reward function.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Abbeel, Pieter and Ng, Andrew Y.},
	year = {2004},
	pages = {1},
	file = {Abbeel and Ng - 2004 - Apprenticeship learning via inverse reinforcement .pdf:/home/fmind/Documents/Zotero/storage/TCMT4493/Abbeel and Ng - 2004 - Apprenticeship learning via inverse reinforcement .pdf:application/pdf}
}

@inproceedings{chen_xgboost:_2016,
	title = {{XGBoost}: {A} {Scalable} {Tree} {Boosting} {System}},
	isbn = {978-1-4503-4232-2},
	shorttitle = {{XGBoost}},
	url = {http://dl.acm.org/citation.cfm?doid=2939672.2939785},
	doi = {10.1145/2939672.2939785},
	abstract = {Tree boosting is a highly eﬀective and widely used machine learning method. In this paper, we describe a scalable endto-end tree boosting system called XGBoost, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, XGBoost scales beyond billions of examples using far fewer resources than existing systems.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Chen, Tianqi and Guestrin, Carlos},
	year = {2016},
	pages = {785--794},
	file = {Chen and Guestrin - 2016 - XGBoost A Scalable Tree Boosting System.pdf:/home/fmind/Documents/Zotero/storage/NA3J5I3L/Chen and Guestrin - 2016 - XGBoost A Scalable Tree Boosting System.pdf:application/pdf}
}

@article{mitchell_need_1980,
	title = {The {Need} for {Biases} in {Learning} {Generalizations}},
	language = {en},
	author = {Mitchell, Tom M},
	year = {1980},
	pages = {3},
	file = {Mitchell - The Need for Biases in Learning Generalizations.pdf:/home/fmind/Documents/Zotero/storage/YEJHM622/Mitchell - The Need for Biases in Learning Generalizations.pdf:application/pdf}
}

@article{lipton_mythos_2016,
	title = {The {Mythos} of {Model} {Interpretability}},
	abstract = {Supervised machine learning models boast remarkable predictive capabilities. But can you trust your model? Will it work in deployment? What else can it tell you about the world? We want models to be not only good, but interpretable. And yet the task of interpretation appears underspeciﬁed. Papers provide diverse and sometimes non-overlapping motivations for interpretability, and offer myriad notions of what attributes render models interpretable. Despite this ambiguity, many papers proclaim interpretability axiomatically, absent further explanation. In this paper, we seek to reﬁne the discourse on interpretability. First, we examine the motivations underlying interest in interpretability, ﬁnding them to be diverse and occasionally discordant. Then, we address model properties and techniques thought to confer interpretability, identifying transparency to humans and post-hoc explanations as competing notions. Throughout, we discuss the feasibility and desirability of different notions, and question the oft-made assertions that linear models are interpretable and that deep neural networks are not.},
	language = {en},
	author = {Lipton, Zachary C},
	month = jun,
	year = {2016},
	pages = {5},
	file = {Lipton - The Mythos of Model Interpretability.pdf:/home/fmind/Documents/Zotero/storage/JN9VXRRP/Lipton - The Mythos of Model Interpretability.pdf:application/pdf}
}

@article{xu_survey_2005,
	title = {Survey of {Clustering} {Algorithms}},
	volume = {16},
	issn = {1045-9227},
	url = {http://ieeexplore.ieee.org/document/1427769/},
	doi = {10.1109/TNN.2005.845141},
	abstract = {Data analysis plays an indispensable role for understanding various phenomena. Cluster analysis, primitive exploration with little or no prior knowledge, consists of research developed across a wide variety of communities. The diversity, on one hand, equips us with many tools. On the other hand, the profusion of options causes confusion. We survey clustering algorithms for data sets appearing in statistics, computer science, and machine learning, and illustrate their applications in some benchmark data sets, the traveling salesman problem, and bioinformatics, a new ﬁeld attracting intensive efforts. Several tightly related topics, proximity measure, and cluster validation, are also discussed.},
	language = {en},
	number = {3},
	urldate = {2018-04-10},
	journal = {IEEE Transactions on Neural Networks},
	author = {Xu, R. and WunschII, D.},
	month = may,
	year = {2005},
	pages = {645--678},
	file = {Xu and WunschII - 2005 - Survey of Clustering Algorithms.pdf:/home/fmind/Documents/Zotero/storage/TPHYWA6N/Xu and WunschII - 2005 - Survey of Clustering Algorithms.pdf:application/pdf}
}

@article{kotsiantis_supervised_2007,
	title = {Supervised {Machine} {Learning}: {A} {Review} of {Classification} {Techniques}},
	language = {en},
	author = {Kotsiantis, S B},
	month = jul,
	year = {2007},
	pages = {20},
	file = {Kotsiantis - Supervised Machine Learning A Review of Classific.pdf:/home/fmind/Documents/Zotero/storage/H7MF7E7F/Kotsiantis - Supervised Machine Learning A Review of Classific.pdf:application/pdf}
}

@article{lin_power_2010,
	title = {Power {Iteration} {Clustering}},
	abstract = {We present a simple and scalable graph clustering method called power iteration clustering (PIC). PIC ﬁnds a very low-dimensional embedding of a dataset using truncated power iteration on a normalized pair-wise similarity matrix of the data. This embedding turns out to be an eﬀective cluster indicator, consistently outperforming widely used spectral methods such as NCut on real datasets. PIC is very fast on large datasets, running over 1,000 times faster than an NCut implementation based on the state-of-the-art IRAM eigenvector computation technique.},
	language = {en},
	author = {Lin, Frank and Cohen, William W},
	year = {2010},
	pages = {8},
	file = {Lin and Cohen - Power Iteration Clustering.pdf:/home/fmind/Documents/Zotero/storage/T2WD7PV6/Lin and Cohen - Power Iteration Clustering.pdf:application/pdf}
}

@article{farber_using_2010,
	title = {On {Using} {Class}-{Labels} in {Evaluation} of {Clusterings}},
	abstract = {Although clustering has been studied for several decades, the fundamental problem of a valid evaluation has not yet been solved. The sound evaluation of clustering results in particular on real data is inherently diﬃcult. In the literature, new clustering algorithms and their results are often externally evaluated with respect to an existing class labeling. These class-labels, however, may not be adequate for the structure of the data or the evaluated cluster model. Here, we survey the literature of diﬀerent related research areas that have observed this problem. We discuss common “defects” that clustering algorithms exhibit w.r.t. this evaluation, and show them on several real world data sets of diﬀerent domains along with a discussion why the detected clusters do not indicate a bad performance of the algorithm but are valid and useful results. An useful alternative evaluation method requires more extensive data labeling than the commonly used class labels or it needs a combination of information measures to take subgroups, supergroups, and overlapping sets of traditional classes into account. Finally, we discuss an evaluation scenario that regards the possible existence of several complementary sets of labels and hope to stimulate the discussion among diﬀerent sub-communities — like ensemble-clustering, subspace-clustering, multi-label classiﬁcation, hierarchical classiﬁcation or hierarchical clustering, and multiview-clustering or alternative clustering —regarding requirements on enhanced evaluation methods.},
	language = {en},
	author = {Färber, Ines and Günnemann, Stephan and Kriegel, Hans-Peter and Kröger, Peer and Müller, Emmanuel and Schubert, Erich and Seidl, Thomas and Zimek, Arthur},
	year = {2010},
	pages = {9},
	file = {Färber et al. - On Using Class-Labels in Evaluation of Clusterings.pdf:/home/fmind/Documents/Zotero/storage/ZM7M9JYR/Färber et al. - On Using Class-Labels in Evaluation of Clusterings.pdf:application/pdf}
}

@article{halkidi_clustering_2001,
	title = {On {Clustering} {Validation} {Techniques}},
	abstract = {Cluster analysis aims at identifying groups of similar objects and, therefore helps to discover distribution of patterns and interesting correlations in large data sets. It has been subject of wide research since it arises in many application domains in engineering, business and social sciences. Especially, in the last years the availability of huge transactional and experimental data sets and the arising requirements for data mining created needs for clustering algorithms that scale and can be applied in diverse domains.},
	language = {en},
	author = {HALKIDI, MARIA},
	year = {2001},
	pages = {39},
	file = {HALKIDI - On Clustering Validation Techniques.pdf:/home/fmind/Documents/Zotero/storage/J5HN5X6X/HALKIDI - On Clustering Validation Techniques.pdf:application/pdf}
}

@inproceedings{goldberg_measuring_2010,
	title = {Measuring {Similarity} between {Sets} of {Overlapping} {Clusters}},
	isbn = {978-1-4244-8439-3},
	url = {http://ieeexplore.ieee.org/document/5591225/},
	doi = {10.1109/SocialCom.2010.50},
	abstract = {The typical task of unsupervised learning is to organize data, for example into clusters, typically disjoint clusters (eg. the K-means algorithm). One would expect (for example) a clustering of books into topics to present overlapping clusters. The situation is even more so in social networks, a source of ever increasing data. Finding the groups or communities in social networks based on interactions between individuals (a measure of similarity) is an unsupervised learning task; and, groups overlap – an individual can be a chess player and a violin player, in which case he would interact with members of both these groups.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Goldberg, Mark K. and Hayvanovych, Mykola and Magdon-Ismail, Malik},
	month = aug,
	year = {2010},
	pages = {303--308},
	file = {Goldberg et al. - 2010 - Measuring Similarity between Sets of Overlapping C.pdf:/home/fmind/Documents/Zotero/storage/JZ9KZMX9/Goldberg et al. - 2010 - Measuring Similarity between Sets of Overlapping C.pdf:application/pdf}
}

@article{kolter_linear_2008,
	title = {Linear {Algebra} {Review} and {Reference}},
	language = {en},
	author = {Kolter, Zico and Do, Chuong},
	month = oct,
	year = {2008},
	pages = {26},
	file = {Kolter and Do - Linear Algebra Review and Reference.pdf:/home/fmind/Documents/Zotero/storage/G4T7IL7U/Kolter and Do - Linear Algebra Review and Reference.pdf:application/pdf}
}

@article{blei_latent_2016,
	title = {Latent {Dirichlet} {Allocation}},
	abstract = {We describe latent Dirichlet allocation (LDA), a generative probabilistic model for collections of discrete data such as text corpora. LDA is a three-level hierarchical Bayesian model, in which each item of a collection is modeled as a ﬁnite mixture over an underlying set of topics. Each topic is, in turn, modeled as an inﬁnite mixture over an underlying set of topic probabilities. In the context of text modeling, the topic probabilities provide an explicit representation of a document. We present efﬁcient approximate inference techniques based on variational methods and an EM algorithm for empirical Bayes parameter estimation. We report results in document modeling, text classiﬁcation, and collaborative ﬁltering, comparing to a mixture of unigrams model and the probabilistic LSI model.},
	language = {en},
	author = {Blei, David M},
	year = {2016},
	pages = {30},
	file = {Blei - Latent Dirichlet Allocation.pdf:/home/fmind/Documents/Zotero/storage/QWPQJ4YM/Blei - Latent Dirichlet Allocation.pdf:application/pdf}
}

@article{rendon_internal_2011,
	title = {Internal versus {External} cluster validation indexes},
	volume = {5},
	abstract = {One of fundamental challenges of clustering is how to evaluate results, without auxiliary information. A common approach for evaluation of clustering results is to use validity indexes. Clustering validity approaches can use three criteria: External criteria (evaluate the result with respect to a pre-specified structure), internal criteria (evaluate the result with respect a information intrinsic to the data alone). Consequently, different types of indexes are used to solve different types of problems and indexes selection depends on the kind of available information. That is why in this paper we show a comparison between external and internal indexes. Results obtained in this study indicate that internal indexes are more accurate in group determining in a given clustering structure. Six internal indexes were used in this study: BIC, CH, DB, SIL, NIVA and DUNN and four external indexes (F-measure, NMIMeasure, Entropy, Purity). The clusters that were used were obtained through clustering algorithms K-means and Bissecting-Kmeans.},
	language = {en},
	number = {1},
	author = {Rendón, Eréndira and Abundez, Itzel and Arizmendi, Alejandra and Quiroz, Elvia M},
	year = {2011},
	pages = {8},
	file = {Rendón et al. - 2011 - Internal versus External cluster validation indexe.pdf:/home/fmind/Documents/Zotero/storage/RYTLXQKT/Rendón et al. - 2011 - Internal versus External cluster validation indexe.pdf:application/pdf}
}

@article{lorenz_how_2011,
	title = {How social influence can undermine the wisdom of crowd effect},
	volume = {108},
	issn = {0027-8424, 1091-6490},
	url = {http://www.pnas.org/cgi/doi/10.1073/pnas.1008636108},
	doi = {10.1073/pnas.1008636108},
	language = {en},
	number = {22},
	urldate = {2018-04-10},
	journal = {Proceedings of the National Academy of Sciences},
	author = {Lorenz, J. and Rauhut, H. and Schweitzer, F. and Helbing, D.},
	month = may,
	year = {2011},
	pages = {9020--9025},
	file = {Lorenz et al. - 2011 - How social influence can undermine the wisdom of c.pdf:/home/fmind/Documents/Zotero/storage/5GPX7WQ8/Lorenz et al. - 2011 - How social influence can undermine the wisdom of c.pdf:application/pdf}
}

@inproceedings{kanter_deep_2015,
	title = {Deep feature synthesis: {Towards} automating data science endeavors},
	isbn = {978-1-4673-8272-4},
	shorttitle = {Deep feature synthesis},
	url = {http://ieeexplore.ieee.org/document/7344858/},
	doi = {10.1109/DSAA.2015.7344858},
	abstract = {In this paper, we develop the Data Science Machine, which is able to derive predictive models from raw data automatically. To achieve this automation, we ﬁrst propose and develop the Deep Feature Synthesis algorithm for automatically generating features for relational datasets. The algorithm follows relationships in the data to a base ﬁeld, and then sequentially applies mathematical functions along that path to create the ﬁnal feature. Second, we implement a generalizable machine learning pipeline and tune it using a novel Gaussian Copula process based approach. We entered the Data Science Machine in 3 data science competitions that featured 906 other data science teams. Our approach beats 615 teams in these data science competitions. In 2 of the 3 competitions we beat a majority of competitors, and in the third, we achieved 94\% of the best competitor’s score. In the best case, with an ongoing competition, we beat 85.6\% of the teams and achieved 95.7\% of the top submissions score.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Kanter, James Max and Veeramachaneni, Kalyan},
	month = oct,
	year = {2015},
	pages = {1--10},
	file = {Kanter and Veeramachaneni - 2015 - Deep feature synthesis Towards automating data sc.pdf:/home/fmind/Documents/Zotero/storage/5AZFYQ85/Kanter and Veeramachaneni - 2015 - Deep feature synthesis Towards automating data sc.pdf:application/pdf}
}

@article{liao_data_2012,
	title = {Data mining techniques and applications – {A} decade review from 2000 to 2011},
	volume = {39},
	issn = {09574174},
	url = {http://linkinghub.elsevier.com/retrieve/pii/S0957417412003077},
	doi = {10.1016/j.eswa.2012.02.063},
	abstract = {In order to determine how data mining techniques (DMT) and their applications have developed, during the past decade, this paper reviews data mining techniques and their applications and development, through a survey of literature and the classiﬁcation of articles, from 2000 to 2011. Keyword indices and article abstracts were used to identify 216 articles concerning DMT applications, from 159 academic journals (retrieved from ﬁve online databases), this paper surveys and classiﬁes DMT, with respect to the following three areas: knowledge types, analysis types, and architecture types, together with their applications in different research and practical domains. A discussion deals with the direction of any future developments in DMT methodologies and applications: (1) DMT is ﬁnding increasing applications in expertise orientation and the development of applications for DMT is a problem-oriented domain. (2) It is suggested that different social science methodologies, such as psychology, cognitive science and human behavior might implement DMT, as an alternative to the methodologies already on offer. (3) The ability to continually change and acquire new understanding is a driving force for the application of DMT and this will allow many new future applications.},
	language = {en},
	number = {12},
	urldate = {2018-04-10},
	journal = {Expert Systems with Applications},
	author = {Liao, Shu-Hsien and Chu, Pei-Hui and Hsiao, Pei-Yuan},
	month = sep,
	year = {2012},
	pages = {11303--11311},
	file = {Liao et al. - 2012 - Data mining techniques and applications – A decade.pdf:/home/fmind/Documents/Zotero/storage/5E4T8VJ7/Liao et al. - 2012 - Data mining techniques and applications – A decade.pdf:application/pdf}
}

@article{jain_data_1999,
	title = {Data clustering: a review},
	volume = {31},
	issn = {03600300},
	shorttitle = {Data clustering},
	url = {http://portal.acm.org/citation.cfm?doid=331499.331504},
	doi = {10.1145/331499.331504},
	language = {en},
	number = {3},
	urldate = {2018-04-10},
	journal = {ACM Computing Surveys},
	author = {Jain, A. K. and Murty, M. N. and Flynn, P. J.},
	month = sep,
	year = {1999},
	pages = {264--323},
	file = {Jain et al. - 1999 - Data clustering a review.pdf:/home/fmind/Documents/Zotero/storage/C9DQVQIB/Jain et al. - 1999 - Data clustering a review.pdf:application/pdf}
}

@article{meila_comparing_2007,
	title = {Comparing clusterings—an information based distance},
	volume = {98},
	issn = {0047259X},
	url = {http://linkinghub.elsevier.com/retrieve/pii/S0047259X06002016},
	doi = {10.1016/j.jmva.2006.11.013},
	abstract = {This paper proposes an information theoretic criterion for comparing two partitions, or clusterings, of the same data set. The criterion, called variation of information (VI), measures the amount of information lost and gained in changing from clustering C to clustering C . The basic properties of VI are presented and discussed. We focus on two kinds of properties: (1) those that help one build intuition about the new criterion (in particular, it is shown the VI is a true metric on the space of clusterings), and (2) those that pertain to the comparability of VI values over different experimental conditions. As the latter properties have rarely been discussed explicitly before, other existing comparison criteria are also examined in their light. Finally we present the VI from an axiomatic point of view, showing that it is the only “sensible” criterion for comparing partitions that is both aligned to the lattice and convexely additive. As a consequence, we prove an impossibility result for comparing partitions: there is no criterion for comparing partitions that simultaneously satisﬁes the above two desirable properties and is bounded.},
	language = {en},
	number = {5},
	urldate = {2018-04-10},
	journal = {Journal of Multivariate Analysis},
	author = {Meilă, Marina},
	month = may,
	year = {2007},
	pages = {873--895},
	file = {Meilă - 2007 - Comparing clusterings—an information based distanc.pdf:/home/fmind/Documents/Zotero/storage/TQZMDAJA/Meilă - 2007 - Comparing clusterings—an information based distanc.pdf:application/pdf}
}

@inproceedings{bo_long_combining_2005,
	title = {Combining {Multiple} {Clusterings} by {Soft} {Correspondence}},
	isbn = {978-0-7695-2278-4},
	url = {http://ieeexplore.ieee.org/document/1565690/},
	doi = {10.1109/ICDM.2005.45},
	abstract = {Combining multiple clusterings arises in various important data mining scenarios. However, ﬁnding a consensus clustering from multiple clusterings is a challenging task because there is no explicit correspondence between the classes from different clusterings. We present a new framework based on soft correspondence to directly address the correspondence problem in combining multiple clusterings. Under this framework, we propose a novel algorithm that iteratively computes the consensus clustering and correspondence matrices using multiplicative updating rules. This algorithm provides a ﬁnal consensus clustering as well as correspondence matrices that gives intuitive interpretation of the relations between the consensus clustering and each clustering from clustering ensembles. Extensive experimental evaluations also demonstrate the effectiveness and potential of this framework as well as the algorithm for discovering a consensus clustering from multiple clusterings.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {{Bo Long} and {Zhongfei Zhang} and Yu, P.S.},
	year = {2005},
	pages = {282--289},
	file = {Bo Long et al. - 2005 - Combining Multiple Clusterings by Soft Corresponde.pdf:/home/fmind/Documents/Zotero/storage/TCWA398G/Bo Long et al. - 2005 - Combining Multiple Clusterings by Soft Corresponde.pdf:application/pdf}
}

@article{fowlkes_method_1983,
	title = {A {Method} for {Comparing} {Two} {Hierarchical} {Clusterings}},
	volume = {78},
	issn = {01621459},
	url = {https://www.jstor.org/stable/2288117?origin=crossref},
	doi = {10.2307/2288117},
	language = {en},
	number = {383},
	urldate = {2018-04-10},
	journal = {Journal of the American Statistical Association},
	author = {Fowlkes, E. B. and Mallows, C. L.},
	month = sep,
	year = {1983},
	pages = {553},
	file = {Fowlkes and Mallows - 1983 - A Method for Comparing Two Hierarchical Clustering.pdf:/home/fmind/Documents/Zotero/storage/ZYXRC3A2/Fowlkes and Mallows - 1983 - A Method for Comparing Two Hierarchical Clustering.pdf:application/pdf}
}

@article{domingos_few_2012,
	title = {A few useful things to know about machine learning},
	volume = {55},
	issn = {00010782},
	url = {http://dl.acm.org/citation.cfm?doid=2347736.2347755},
	doi = {10.1145/2347736.2347755},
	abstract = {Machine learning algorithms can ﬁgure out how to perform important tasks by generalizing from examples. This is often feasible and cost-eﬀective where manual programming is not. As more data becomes available, more ambitious problems can be tackled. As a result, machine learning is widely used in computer science and other ﬁelds. However, developing successful machine learning applications requires a substantial amount of “black art” that is hard to ﬁnd in textbooks. This article summarizes twelve key lessons that machine learning researchers and practitioners have learned. These include pitfalls to avoid, important issues to focus on, and answers to common questions.},
	language = {en},
	number = {10},
	urldate = {2018-04-10},
	journal = {Communications of the ACM},
	author = {Domingos, Pedro},
	month = oct,
	year = {2012},
	pages = {78},
	file = {Domingos - 2012 - A few useful things to know about machine learning.pdf:/home/fmind/Documents/Zotero/storage/TUG846WW/Domingos - 2012 - A few useful things to know about machine learning.pdf:application/pdf}
}

@misc{statista_statistica_2018,
	title = {Statistica},
	url = {https://www.statista.com/statistics/266210/number-of-available-applications-in-the-google-play-store/},
	language = {en},
	urldate = {2018-04-01},
	journal = {Number of available applications in the Google Play Store from December 2009 to December 2017},
	author = {{Statista}},
	month = apr,
	year = {2018},
	note = {https://www.statista.com/statistics/266210/number-of-available-applications-in-the-google-play-store/}
}

@inproceedings{xu_automatically_2016,
	title = {Automatically {Evading} {Classifiers}: {A} {Case} {Study} on {PDF} {Malware} {Classifiers}},
	isbn = {978-1-891562-41-9},
	shorttitle = {Automatically {Evading} {Classifiers}},
	url = {https://www.ndss-symposium.org/wp-content/uploads/sites/25/2017/09/automatically-evading-classifiers.pdf},
	doi = {10.14722/ndss.2016.23115},
	abstract = {Machine learning is widely used to develop classiﬁers for security tasks. However, the robustness of these methods against motivated adversaries is uncertain. In this work, we propose a generic method to evaluate the robustness of classiﬁers under attack. The key idea is to stochastically manipulate a malicious sample to ﬁnd a variant that preserves the malicious behavior but is classiﬁed as benign by the classiﬁer. We present a general approach to search for evasive variants and report on results from experiments using our techniques against two PDF malware classiﬁers, PDFrate and Hidost. Our method is able to automatically ﬁnd evasive variants for both classiﬁers for all of the 500 malicious seeds in our study. Our results suggest a general method for evaluating classiﬁers used in security applications, and raise serious doubts about the effectiveness of classiﬁers based on superﬁcial features in the presence of adversaries.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {Internet Society},
	author = {Xu, Weilin and Qi, Yanjun and Evans, David},
	year = {2016},
	file = {Xu et al. - 2016 - Automatically Evading Classifiers A Case Study on.pdf:/home/fmind/Documents/Zotero/storage/S9QDXJWW/Xu et al. - 2016 - Automatically Evading Classifiers A Case Study on.pdf:application/pdf}
}

@inproceedings{wang_rebuilding_2014,
	title = {Rebuilding the {Tower} of {Babel}: {Towards} {Cross}-{System} {Malware} {Information} {Sharing}},
	isbn = {978-1-4503-2598-1},
	shorttitle = {Rebuilding the {Tower} of {Babel}},
	url = {http://dl.acm.org/citation.cfm?doid=2661829.2662086},
	doi = {10.1145/2661829.2662086},
	abstract = {Anti-virus systems developed by different vendors often demonstrate strong discrepancies in how they name malware, which signﬁcantly hinders malware information sharing. While existing work has proposed a plethora of malware naming standards, most antivirus vendors were reluctant to change their own naming conventions. In this paper we explore a new, more pragmatic alternative. We propose to exploit the correlation between malware naming of different anti-virus systems to create their consensus classiﬁcation, through which these systems can share malware information without modifying their naming conventions. Speciﬁcally we present Latin, a novel classiﬁcation integration framework leveraging the correspondence between participating anti-virus systems as reﬂected in heterogeneous information sources at instanceinstance, instance-name, and name-name levels. We provide results from extensive experimental studies using real malware datasets and concrete use cases to verify the efﬁcacy of Latin in supporting cross-system malware information sharing.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Wang, Ting and Meng, Shicong and Gao, Wei and Hu, Xin},
	year = {2014},
	keywords = {vt-labels},
	pages = {1239--1248},
	file = {Wang et al. - 2014 - Rebuilding the Tower of Babel Towards Cross-Syste.pdf:/home/fmind/Documents/Zotero/storage/ZRH47BY7/Wang et al. - 2014 - Rebuilding the Tower of Babel Towards Cross-Syste.pdf:application/pdf}
}

@article{mundie_mal:_2013,
	title = {The {MAL}: {A} {Malware} {Analysis} {Lexicon}},
	language = {en},
	author = {Mundie, David A and McIntire, David M},
	month = feb,
	year = {2013},
	keywords = {vt-labels},
	pages = {47},
	file = {Mundie and McIntire - The MAL A Malware Analysis Lexicon.pdf:/home/fmind/Documents/Zotero/storage/TCHDFXL5/Mundie and McIntire - The MAL A Malware Analysis Lexicon.pdf:application/pdf}
}

@incollection{hutchison_av-meter:_2014,
	address = {Cham},
	title = {{AV}-{Meter}: {An} {Evaluation} of {Antivirus} {Scans} and {Labels}},
	volume = {8550},
	isbn = {978-3-319-08508-1 978-3-319-08509-8},
	shorttitle = {{AV}-{Meter}},
	url = {http://link.springer.com/10.1007/978-3-319-08509-8_7},
	abstract = {Antivirus scanners are designed to detect malware and, to a lesser extent, to label detections based on a family association. The labeling provided by AV vendors has many applications such as guiding efforts of disinfection and countermeasures, intelligence gathering, and attack attribution, among others. Furthermore, researchers rely on AV labels to establish a baseline of ground truth to compare their detection and classiﬁcation algorithms. This is done despite many papers pointing out the subtle problem of relying on AV labels. However, the literature lacks any systematic study on validating the performance of antivirus scanners, and the reliability of those labels or detection.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Detection of {Intrusions} and {Malware}, and {Vulnerability} {Assessment}},
	publisher = {Springer International Publishing},
	author = {Mohaisen, Aziz and Alrawi, Omar},
	editor = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Kobsa, Alfred and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Terzopoulos, Demetri and Tygar, Doug and Weikum, Gerhard and Dietrich, Sven},
	year = {2014},
	doi = {10.1007/978-3-319-08509-8_7},
	keywords = {vt-analysis},
	pages = {112--131},
	file = {Mohaisen and Alrawi - 2014 - AV-Meter An Evaluation of Antivirus Scans and Lab.pdf:/home/fmind/Documents/Zotero/storage/VEBKWFKA/Mohaisen and Alrawi - 2014 - AV-Meter An Evaluation of Antivirus Scans and Lab.pdf:application/pdf}
}

@article{marx_wildlist_nodate,
	title = {{THE} {WILDLIST} {IS} {DEAD}, {LONG} {LIVE} {THE} {WILDLIST}!},
	abstract = {For a very long time, the WildList was the accepted standard for all kinds of anti-malware software test. However, today’s real challenges – like targeted attacks and zero-day exploits, as well as adware and spyware – are not covered by the WildList. Traditionally, the WildList has only focused on self-replicating malware such as viruses and worms, but in today’s world these malware types have almost died out and been replaced by Trojan horses with keyloggers and options to steal PIN and TAN codes for online banking. (The malware world has gone commercial and some of the bad guys are making more money than traditional AV companies!) Besides this, the WildList is usually published two to three months after the reporting period, so it is outdated when released. This paper will focus on the current problems with the WildList and suggest methods to increase its usefulness again – to ensure not only that all of today’s malware types are covered, but also that the WildList will always be current when published on a more regular basis. This includes an analysis of all required processes, better reporting methods and automation techniques which must be used to avoid delays in publication.},
	language = {en},
	author = {Marx, Andreas and Dessmann, Frank},
	keywords = {vt-labels},
	pages = {11},
	file = {Marx and Dessmann - THE WILDLIST IS DEAD, LONG LIVE THE WILDLIST!.pdf:/home/fmind/Documents/Zotero/storage/MGVMSHB7/Marx and Dessmann - THE WILDLIST IS DEAD, LONG LIVE THE WILDLIST!.pdf:application/pdf}
}

@incollection{jajodia_finding_2011,
	address = {Berlin, Heidelberg},
	title = {Finding {Non}-trivial {Malware} {Naming} {Inconsistencies}},
	volume = {7093},
	isbn = {978-3-642-25559-5 978-3-642-25560-1},
	url = {http://link.springer.com/10.1007/978-3-642-25560-1_10},
	abstract = {Malware analysts, and in particular antivirus vendors, never agreed on a single naming convention for malware specimens. This leads to confusion and difﬁculty in comparing coverage of different antivirus engines, and generally causes issues, in particular for researchers, when integrating and systematizing known threats, or comparing the outcome of different detectors. Given the observation that solving naming inconsistencies is almost an utopia—because it would require all the vendors to agree on a single naming convention—in this paper we take a step back and concentrate on the problem of ﬁnding inconsistencies. Solving inconsistencies is indeed impossible without knowing exactly where they are.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Information {Systems} {Security}},
	publisher = {Springer Berlin Heidelberg},
	author = {Maggi, Federico and Bellini, Andrea and Salvaneschi, Guido and Zanero, Stefano},
	editor = {Jajodia, Sushil and Mazumdar, Chandan},
	year = {2011},
	doi = {10.1007/978-3-642-25560-1_10},
	keywords = {vt-labels},
	pages = {144--159},
	file = {Maggi et al. - 2011 - Finding Non-trivial Malware Naming Inconsistencies.pdf:/home/fmind/Documents/Zotero/storage/MC8YTY4I/Maggi et al. - 2011 - Finding Non-trivial Malware Naming Inconsistencies.pdf:application/pdf}
}

@article{kelchner_consistent_2010,
	title = {The (in)consistent naming of malcode},
	volume = {2010},
	issn = {13613723},
	url = {http://linkinghub.elsevier.com/retrieve/pii/S1361372310700075},
	doi = {10.1016/S1361-3723(10)70007-5},
	language = {en},
	number = {2},
	urldate = {2018-04-10},
	journal = {Computer Fraud \& Security},
	author = {Kelchner, Tom},
	month = feb,
	year = {2010},
	keywords = {vt-labels},
	pages = {5--7},
	file = {Kelchner - 2010 - The (in)consistent naming of malcode.pdf:/home/fmind/Documents/Zotero/storage/24R7ZJ3B/Kelchner - 2010 - The (in)consistent naming of malcode.pdf:application/pdf}
}

@inproceedings{kantchelian_better_2015,
	title = {Better {Malware} {Ground} {Truth}: {Techniques} for {Weighting} {Anti}-{Virus} {Vendor} {Labels}},
	isbn = {978-1-4503-3826-4},
	shorttitle = {Better {Malware} {Ground} {Truth}},
	url = {http://dl.acm.org/citation.cfm?doid=2808769.2808780},
	doi = {10.1145/2808769.2808780},
	abstract = {We examine the problem of aggregating the results of multiple anti-virus (AV) vendors’ detectors into a single authoritative ground-truth label for every binary. To do so, we adapt a well-known generative Bayesian model that postulates the existence of a hidden ground truth upon which the AV labels depend. We use training based on Expectation Maximization for this fully unsupervised technique. We evaluate our method using 279,327 distinct binaries from VirusTotal, each of which appeared for the ﬁrst time between January 2012 and June 2014.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Kantchelian, Alex and Tschantz, Michael Carl and Afroz, Sadia and Miller, Brad and Shankar, Vaishaal and Bachwani, Rekha and Joseph, Anthony D. and Tygar, J. D.},
	year = {2015},
	keywords = {vt-analysis, vt-labels},
	pages = {45--56},
	file = {Kantchelian et al. - 2015 - Better Malware Ground Truth Techniques for Weight.pdf:/home/fmind/Documents/Zotero/storage/HFAS422Z/Kantchelian et al. - 2015 - Better Malware Ground Truth Techniques for Weight.pdf:application/pdf}
}

@article{gregio_toward_2015,
	title = {Toward a {Taxonomy} of {Malware} {Behaviors}},
	volume = {58},
	issn = {0010-4620, 1460-2067},
	url = {https://academic.oup.com/comjnl/article-lookup/doi/10.1093/comjnl/bxv047},
	doi = {10.1093/comjnl/bxv047},
	language = {en},
	number = {10},
	urldate = {2018-04-10},
	journal = {The Computer Journal},
	author = {Grégio, André Ricardo Abed and Afonso, Vitor Monte and Filho, Dario Simões Fernandes and Geus, Paulo Lício de and Jino, Mario},
	month = oct,
	year = {2015},
	keywords = {vt-labels},
	pages = {2758--2777},
	file = {Grégio et al. - 2015 - Toward a Taxonomy of Malware Behaviors.pdf:/home/fmind/Documents/Zotero/storage/TNGTEHDE/Grégio et al. - 2015 - Toward a Taxonomy of Malware Behaviors.pdf:application/pdf}
}

@article{gordon_virus_nodate,
	title = {Virus and {Vulnerability} {Classification} {Schemes}: {Standards} and {Integration}},
	language = {en},
	author = {Gordon, Sarah},
	pages = {16},
	file = {Gordon - Virus and Vulnerability Classification Schemes St.pdf:/home/fmind/Documents/Zotero/storage/5TTUJGS7/Gordon - Virus and Vulnerability Classification Schemes St.pdf:application/pdf}
}

@article{bureau_dose_2008,
	title = {A dose by any other name},
	volume = {8},
	abstract = {Years ago, when alt.comp.virus was still useful, ‘Name that virus’ was a popular virtual party game, and virus names were, if not standardized, at least easy to cross-reference with tools like VGrep [1]. In 2008, the numbers have escalated exponentially, analysis and detection have become increasingly generic, and naming, even for some WildList malware, has become nearly useless because of the difﬁculty of mapping samples to names. The CME (Common Malware Enumeration) initiative [2], while attempting to achieve something many people wanted, seems to have foundered on the rocks of the reality. Yet we continue to provide ‘top ten’ threat lists that have virtually no commonality or consistency across different vendors and sites, so that our customers continue to ask whether we detect the media virus du jour, and the slashdotty community point to us and giggle at our incompetence in failing to provide information about what we detect. Are all our solutions going generic? Are there ways to resolve this issue so that our customers can understand what’s happening and regain some faith in the industry without being hung up on the question ‘Do you detect virus X?’ We think so, and will discuss some possible approaches in this paper.},
	language = {en},
	journal = {Virus Bulletin Conference},
	author = {Bureau, Pierre-Marc and Harley, David},
	year = {2008},
	keywords = {vt-labels},
	pages = {224--231},
	file = {Diego - Pierre-Marc Bureau, David Harley.pdf:/home/fmind/Documents/Zotero/storage/F9UIRCAD/Diego - Pierre-Marc Bureau, David Harley.pdf:application/pdf}
}

@article{bontchev_current_2005,
	title = {Current {Status} of the {CARO} {Malware} {Naming} {Scheme}},
	abstract = {The CARO malware naming scheme was created more than 15 years ago. To this date, it remains the naming scheme that is the most widely used in anti–virus products—despite being criticized left and right and the fact that no product has absolute compliance with it. One frequent criticism is that detailed documentation of the up–to–date status of the Scheme is difficult to find and that this hampers the Scheme’s popularity. This paper attempts to solve this problem. It documents the CARO malware naming scheme completely, including the recently introduced changes. It will be made freely available on the Web and will be continuously updated as new changes are introduced. Its purpose is to serve as an easily and publicly accessible documentation of the latest state of the CARO Malware Naming Scheme.},
	language = {en},
	author = {Bontchev, Dr Vesselin},
	month = oct,
	year = {2005},
	keywords = {vt-labels},
	pages = {29},
	file = {Bontchev - Current Status of the CARO Malware Naming Scheme.pdf:/home/fmind/Documents/Zotero/storage/GFZ3TUIF/Bontchev - Current Status of the CARO Malware Naming Scheme.pdf:application/pdf}
}

@article{shoshitaishvili_state_2016,
	title = {({State} of) {The} {Art} of {War}: {Offensive} {Techniques} in {Binary} {Analysis}},
	abstract = {Finding and exploiting vulnerabilities in binary code is a challenging task. The lack of high-level, semantically rich information about data structures and control constructs makes the analysis of program properties harder to scale. However, the importance of binary analysis is on the rise. In many situations binary analysis is the only possible way to prove (or disprove) properties about the code that is actually executed. In this paper, we present a binary analysis framework that implements a number of analysis techniques that have been proposed in the past. We present a systematized implementation of these techniques, which allows other researchers to compose them and develop new approaches. In addition, the implementation of these techniques in a unifying framework allows for the direct comparison of these apporaches and the identiﬁcation of their advantages and disadvantages. The evaluation included in this paper is performed using a recent dataset created by DARPA for evaluating the effectiveness of binary vulnerability analysis techniques.},
	language = {en},
	author = {Shoshitaishvili, Yan and Wang, Ruoyu and Salls, Christopher and Stephens, Nick and Polino, Mario and Dutcher, Andrew and Grosen, John and Feng, Siji and Hauser, Christophe and Kruegel, Christopher and Vigna, Giovanni and Barbara, UC Santa},
	year = {2016},
	pages = {20},
	file = {Shoshitaishvili et al. - (State of) The Art of War Offensive Techniques in.pdf:/home/fmind/Documents/Zotero/storage/ADQQDVRQ/Shoshitaishvili et al. - (State of) The Art of War Offensive Techniques in.pdf:application/pdf}
}

@inproceedings{lalonde_levesque_clinical_2013,
	title = {A clinical study of risk factors related to malware infections},
	isbn = {978-1-4503-2477-9},
	url = {http://dl.acm.org/citation.cfm?doid=2508859.2516747},
	doi = {10.1145/2508859.2516747},
	abstract = {The success of malicious software (malware) depends upon both technical and human factors. The most security conscious users are vulnerable to zero-day exploits; the best security mechanisms can be circumvented by poor user choices. While there has been signiﬁcant research addressing the technical aspects of malware attack and defense, there has been much less research reporting on how human behavior interacts with both malware and current malware defenses. In this paper we describe a proof-of-concept ﬁeld study designed to examine the interactions between users, antivirus (anti-malware) software, and malware as they occur on deployed systems. The 4-month study, conducted in a fashion similar to the clinical trials used to evaluate medical interventions, involved 50 subjects whose laptops were instrumented to monitor possible infections and gather data on user behavior. Although the population size was limited, this initial study produced some intriguing, non-intuitive insights into the eﬃcacy of current defenses, particularly with regards to the technical sophistication of end users. We assert that this work shows the feasibility and utility of testing security software through long-term ﬁeld studies with greater ecological validity than can be achieved through other means.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Lalonde Levesque, Fanny and Nsiempba, Jude and Fernandez, José M. and Chiasson, Sonia and Somayaji, Anil},
	year = {2013},
	keywords = {study},
	pages = {97--108},
	file = {Lalonde Levesque et al. - 2013 - A clinical study of risk factors related to malwar.pdf:/home/fmind/Documents/Zotero/storage/TR4PLBN2/Lalonde Levesque et al. - 2013 - A clinical study of risk factors related to malwar.pdf:application/pdf}
}

@article{preda_semantics-based_2007,
	title = {A {Semantics}-{Based} {Approach} to {Malware} {Detection} ∗},
	abstract = {Malware detection is a crucial aspect of software security. Current malware detectors work by checking for “signatures,” which attempt to capture (syntactic) characteristics of the machine-level byte sequence of the malware. This reliance on a syntactic approach makes such detectors vulnerable to code obfuscations, increasingly used by malware writers, that alter syntactic properties of the malware byte sequence without signiﬁcantly affecting their execution behavior.},
	language = {en},
	author = {Preda, Mila Dalla and Christodorescu, Mihai and Jha, Somesh and Debray, Saumya},
	year = {2007},
	keywords = {detection, obfuscation},
	pages = {12},
	file = {Preda et al. - A Semantics-Based Approach to Malware Detection ∗.pdf:/home/fmind/Documents/Zotero/storage/6ITR2DD4/Preda et al. - A Semantics-Based Approach to Malware Detection ∗.pdf:application/pdf}
}

@incollection{lippmann_study_2008,
	address = {Berlin, Heidelberg},
	title = {A {Study} of the {Packer} {Problem} and {Its} {Solutions}},
	volume = {5230},
	isbn = {978-3-540-87402-7 978-3-540-87403-4},
	url = {http://link.springer.com/10.1007/978-3-540-87403-4_6},
	abstract = {An increasing percentage of malware programs distributed in the wild are packed by packers, which are programs that transform an input binary’s appearance without aﬀecting its execution semantics, to create new malware variants that can evade signature-based malware detection tools. This paper reports the results of a comprehensive study of the extent of the packer problem based on data collected at Symantec and the eﬀectiveness of existing solutions to this problem. Then the paper presents a generic unpacking solution called Justin (Just-In-Time AV scanning), which is designed to detect the end of unpacking of a packed binary’s run and invoke AV scanning against the process image at that time. For accurate end-to-unpacking detection, Justin incorporates the following heuristics: Dirty Page Execution, Unpacker Memory Avoidance, Stack Pointer Check and Command-Line Argument Access. Empirical testing shows that when compared with SymPack, which contains a set of manually created unpackers for a collection of selective packers, Justin’s eﬀectiveness is comparable to SymPack for those binaries packed by these supported packers, and is much better than SymPack for binaries packed by those that SymPack does not support.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Recent {Advances} in {Intrusion} {Detection}},
	publisher = {Springer Berlin Heidelberg},
	author = {Guo, Fanglu and Ferrie, Peter and Chiueh, Tzi-cker},
	editor = {Lippmann, Richard and Kirda, Engin and Trachtenberg, Ari},
	year = {2008},
	doi = {10.1007/978-3-540-87403-4_6},
	keywords = {obfuscation},
	pages = {98--115},
	file = {Guo et al. - 2008 - A Study of the Packer Problem and Its Solutions.pdf:/home/fmind/Documents/Zotero/storage/WJRSFHTZ/Guo et al. - 2008 - A Study of the Packer Problem and Its Solutions.pdf:application/pdf}
}

@article{a.saeed_survey_2013,
	title = {A {Survey} on {Malware} and {Malware} {Detection} {Systems}},
	volume = {67},
	issn = {09758887},
	url = {http://research.ijcaonline.org/volume67/number16/pxc3887108.pdf},
	doi = {10.5120/11480-7108},
	abstract = {Over the last decades, there were lots of studies made on malware and their countermeasures. The most recent reports emphasize that the invention of malicious software is rapidly increasing. Moreover, the intensive use of networks and Internet increases the ability of the spreading and the effectiveness of this kind of software. On the other hand, researchers and manufacturers making great efforts to produce anti-malware systems with effective detection methods for better protection on computers. In this paper, a detailed review has been conducted on the current situation of malware infection and the work done to improve anti-malware or malware detection systems. Thus, it provides an up-to-date comparative reference for developers of malware detection systems.},
	language = {en},
	number = {16},
	urldate = {2018-04-10},
	journal = {International Journal of Computer Applications},
	author = {A.Saeed, Imtithal and Selamat, Ali and M. A. Abuagoub, Ali},
	month = apr,
	year = {2013},
	keywords = {survey},
	pages = {25--31},
	file = {A.Saeed et al. - 2013 - A Survey on Malware and Malware Detection Systems.pdf:/home/fmind/Documents/Zotero/storage/BTQ6ME83/A.Saeed et al. - 2013 - A Survey on Malware and Malware Detection Systems.pdf:application/pdf}
}

@inproceedings{veeramachaneni_ai2:_2016,
	title = {{AI}2: {Training} a {Big} {Data} {Machine} to {Defend}},
	isbn = {978-1-5090-2403-2},
	shorttitle = {{AI}{\textasciicircum}2},
	url = {http://ieeexplore.ieee.org/document/7502263/},
	doi = {10.1109/BigDataSecurity-HPSC-IDS.2016.79},
	abstract = {We present an analyst-in-the-loop security system, where analyst intuition is put together with stateof-the-art machine learning to build an end-to-end active learning system. The system has four key features: a big data behavioral analytics platform, an ensemble of outlier detection methods, a mechanism to obtain feedback from security analysts, and a supervised learning module. When these four components are run in conjunction on a daily basis and are compared to an unsupervised outlier detection method, detection rate improves by an average of 3.41×, and false positives are reduced ﬁvefold. We validate our system with a real-world data set consisting of 3.6 billion log lines. These results show that our system is capable of learning to defend against unseen attacks.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Veeramachaneni, Kalyan and Arnaldo, Ignacio and Korrapati, Vamsi and Bassias, Constantinos and Li, Ke},
	month = apr,
	year = {2016},
	keywords = {detection},
	pages = {49--54},
	file = {Veeramachaneni et al. - 2016 - AI^2 Training a Big Data Machine to Defend.pdf:/home/fmind/Documents/Zotero/storage/LSZRKCD6/Veeramachaneni et al. - 2016 - AI^2 Training a Big Data Machine to Defend.pdf:application/pdf}
}

@inproceedings{mezzour_empirical_2015,
	title = {An empirical study of global malware encounters},
	isbn = {978-1-4503-3376-4},
	url = {http://dl.acm.org/citation.cfm?doid=2746194.2746202},
	doi = {10.1145/2746194.2746202},
	abstract = {The number of trojans, worms, and viruses that computers encounter varies greatly across countries. Empirically identifying factors behind such variation can provide a scientiﬁc empirical basis to policy actions to reduce malware encounters in the most aﬀected countries. However, our understanding of these factors is currently mainly based on expert opinions, not empirical evidence.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Mezzour, Ghita and Carley, Kathleen M. and Carley, L. Richard},
	year = {2015},
	keywords = {study},
	pages = {1--11},
	file = {Mezzour et al. - 2015 - An empirical study of global malware encounters.pdf:/home/fmind/Documents/Zotero/storage/72CBLED2/Mezzour et al. - 2015 - An empirical study of global malware encounters.pdf:application/pdf}
}

@article{ye_intelligent_2008,
	title = {An intelligent {PE}-malware detection system based on association mining},
	volume = {4},
	issn = {1772-9890, 1772-9904},
	url = {http://link.springer.com/10.1007/s11416-008-0082-4},
	doi = {10.1007/s11416-008-0082-4},
	language = {en},
	number = {4},
	urldate = {2018-04-10},
	journal = {Journal in Computer Virology},
	author = {Ye, Yanfang and Wang, Dingding and Li, Tao and Ye, Dongyi and Jiang, Qingshan},
	month = nov,
	year = {2008},
	keywords = {detection},
	pages = {323--334},
	file = {Ye et al. - 2008 - An intelligent PE-malware detection system based o.pdf:/home/fmind/Documents/Zotero/storage/35PEGZGW/Ye et al. - 2008 - An intelligent PE-malware detection system based o.pdf:application/pdf}
}

@incollection{kruegel_automated_2007,
	address = {Berlin, Heidelberg},
	title = {Automated {Classification} and {Analysis} of {Internet} {Malware}},
	volume = {4637},
	isbn = {978-3-540-74319-4 978-3-540-74320-0},
	url = {http://link.springer.com/10.1007/978-3-540-74320-0_10},
	abstract = {Numerous attacks, such as worms, phishing, and botnets, threaten the availability of the Internet, the integrity of its hosts, and the privacy of its users. A core element of defense against these attacks is anti-virus (AV) software—a service that detects, removes, and characterizes these threats. The ability of these products to successfully characterize these threats has far-reaching eﬀects—from facilitating sharing across organizations, to detecting the emergence of new threats, and assessing risk in quarantine and cleanup. In this paper, we examine the ability of existing host-based anti-virus products to provide semantically meaningful information about the malicious software and tools (or malware) used by attackers. Using a large, recent collection of malware that spans a variety of attack vectors (e.g., spyware, worms, spam), we show that diﬀerent AV products characterize malware in ways that are inconsistent across AV products, incomplete across malware, and that fail to be concise in their semantics. To address these limitations, we propose a new classiﬁcation technique that describes malware behavior in terms of system state changes (e.g., ﬁles written, processes created) rather than in sequences or patterns of system calls. To address the sheer volume of malware and diversity of its behavior, we provide a method for automatically categorizing these proﬁles of malware into groups that reﬂect similar classes of behaviors and demonstrate how behavior-based clustering provides a more direct and eﬀective way of classifying and analyzing Internet malware.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Recent {Advances} in {Intrusion} {Detection}},
	publisher = {Springer Berlin Heidelberg},
	author = {Bailey, Michael and Oberheide, Jon and Andersen, Jon and Mao, Z. Morley and Jahanian, Farnam and Nazario, Jose},
	editor = {Kruegel, Christopher and Lippmann, Richard and Clark, Andrew},
	year = {2007},
	doi = {10.1007/978-3-540-74320-0_10},
	keywords = {classification, vt-features, vt-labels},
	pages = {178--197},
	file = {Bailey et al. - 2007 - Automated Classification and Analysis of Internet .pdf:/home/fmind/Documents/Zotero/storage/DG9EFZ2H/Bailey et al. - 2007 - Automated Classification and Analysis of Internet .pdf:application/pdf}
}

@article{rieck_automatic_2011,
	title = {Automatic analysis of malware behavior using machine learning},
	volume = {19},
	issn = {18758924, 0926227X},
	url = {http://www.medra.org/servlet/aliasResolver?alias=iospress&doi=10.3233/JCS-2010-0410},
	doi = {10.3233/JCS-2010-0410},
	abstract = {Malicious software—so called malware—poses a major threat to the security of computer systems. The amount and diversity of its variants render classic security defenses ineffective, such that millions of hosts in the Internet are infected with malware in the form of computer viruses, Internet worms and Trojan horses. While obfuscation and polymorphism employed by malware largely impede detection at ﬁle level, the dynamic analysis of malware binaries during run-time provides an instrument for characterizing and defending against the threat of malicious software.},
	language = {en},
	number = {4},
	urldate = {2018-04-10},
	journal = {Journal of Computer Security},
	author = {Rieck, Konrad and Trinius, Philipp and Willems, Carsten and Holz, Thorsten},
	month = jun,
	year = {2011},
	keywords = {classification, dynamic},
	pages = {639--668},
	file = {Rieck et al. - 2011 - Automatic analysis of malware behavior using machi.pdf:/home/fmind/Documents/Zotero/storage/ZNVJ2ZZG/Rieck et al. - 2011 - Automatic analysis of malware behavior using machi.pdf:application/pdf}
}

@article{yadegari_automatic_2016,
	title = {{AUTOMATIC} {DEOBFUSCATION} {AND} {REVERSE} {ENGINEERING} {OF} {OBFUSCATED} {CODE}},
	language = {en},
	author = {Yadegari, Babak},
	year = {2016},
	keywords = {reverse, obfuscation},
	pages = {201},
	file = {Yadegari - AUTOMATIC DEOBFUSCATION AND REVERSE ENGINEERING OF.pdf:/home/fmind/Documents/Zotero/storage/MPN85RTP/Yadegari - AUTOMATIC DEOBFUSCATION AND REVERSE ENGINEERING OF.pdf:application/pdf}
}

@inproceedings{ye_automatic_2010,
	title = {Automatic malware categorization using cluster ensemble},
	isbn = {978-1-4503-0055-1},
	url = {http://dl.acm.org/citation.cfm?doid=1835804.1835820},
	doi = {10.1145/1835804.1835820},
	abstract = {Malware categorization is an important problem in malware analysis and has attracted a lot of attention of computer security researchers and anti-malware industry recently. Today’s malware samples are created at a rate of millions per day with the development of malware writing techniques. There is thus an urgent need of effective methods for automatic malware categorization. Over the last few years, many clustering techniques have been employed for automatic malware categorization. However, such techniques have isolated successes with limited effectiveness and efﬁciency, and few have been applied in real anti-malware industry.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Ye, Yanfang and Li, Tao and Chen, Yong and Jiang, Qingshan},
	year = {2010},
	keywords = {classification},
	pages = {95},
	file = {Ye et al. - 2010 - Automatic malware categorization using cluster ens.pdf:/home/fmind/Documents/Zotero/storage/AP4CDQHP/Ye et al. - 2010 - Automatic malware categorization using cluster ens.pdf:application/pdf}
}

@article{miller_back_2015,
	title = {Back to the {Future}: {Malware} {Detection} with {Temporally} {Consistent} {Labels}},
	abstract = {The malware detection arms race involves constant change: malware changes to evade detection and labels change as detection mechanisms react. Recognizing that malware changes over time, prior work has enforced temporally consistent samples by requiring that training binaries predate evaluation binaries. We present temporally consistent labels, requiring that training labels also predate evaluation binaries since training labels collected after evaluation binaries constitute label knowledge from the future. Using a dataset containing 1.1 million binaries from over 2.5 years, we show that enforcing temporal label consistency decreases detection from 91\% to 72\% at a 0.5\% false positive rate compared to temporal samples alone.},
	language = {en},
	author = {Miller, Brad and Kantchelian, Alex and Tschantz, Michael Carl and Afroz, Sadia and Bachwani, Rekha and Faizullabhoy, Riyaz and Huang, Ling and Shankar, Vaishaal and Wu, Tony and Yiu, George and Joseph, Anthony D and Tygar, J D and Berkeley, UC},
	month = oct,
	year = {2015},
	keywords = {detection, evolution},
	pages = {12},
	file = {Miller et al. - Back to the Future Malware Detection with Tempora.pdf:/home/fmind/Documents/Zotero/storage/P36Y9LMM/Miller et al. - Back to the Future Malware Detection with Tempora.pdf:application/pdf}
}

@incollection{wyld_behavioral_2011,
	address = {Berlin, Heidelberg},
	title = {Behavioral {Malware} {Detection} {Expert} {System} – {Tarantula}},
	volume = {196},
	isbn = {978-3-642-22539-0 978-3-642-22540-6},
	url = {http://link.springer.com/10.1007/978-3-642-22540-6_7},
	abstract = {The number of new malware samples and their complexity is increasing rapidly because of which protecting the system with signature based detection has become increasingly challenging task. In this work we present a novel behaviour-based malware detection expert system named tarantula which makes use of suspicious behaviour rules to detect malicious activity on the system. In our research, we observed that malware targets critical system resources such as system ﬁles and registry of operating system in order to execute; shield itself and propagate to other hosts. We identiﬁed the critical system resources such as system ﬁles and registry in Microsoft Windows and evolved suspicious behaviour rules at a granular level. These behavioural rules are enforced using monitoring and enforcement layer. Through extensive experimentation and testing, we conclude that tool has high detection rate and very less overhead and false positives. The implementation details of prototype (Tarantula) developed for Microsoft Windows XP and Vista operating systems are also provided.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Advances in {Network} {Security} and {Applications}},
	publisher = {Springer Berlin Heidelberg},
	author = {Romana, Sandeep and Phadnis, Swapnil and Pareek, Himanshu and Eswari, P. R. L.},
	editor = {Wyld, David C. and Wozniak, Michal and Chaki, Nabendu and Meghanathan, Natarajan and Nagamalai, Dhinaharan},
	year = {2011},
	doi = {10.1007/978-3-642-22540-6_7},
	keywords = {detection, cognitive},
	pages = {65--77},
	file = {Romana et al. - 2011 - Behavioral Malware Detection Expert System – Taran.pdf:/home/fmind/Documents/Zotero/storage/TZZE59FW/Romana et al. - 2011 - Behavioral Malware Detection Expert System – Taran.pdf:application/pdf}
}

@inproceedings{jang_bitshred:_2011,
	title = {{BitShred}: feature hashing malware for scalable triage and semantic analysis},
	isbn = {978-1-4503-0948-6},
	shorttitle = {{BitShred}},
	url = {http://dl.acm.org/citation.cfm?doid=2046707.2046742},
	doi = {10.1145/2046707.2046742},
	abstract = {The sheer volume of new malware found each day is growing at an exponential pace. This growth has created a need for automatic malware triage techniques that determine what malware is similar, what malware is unique, and why. In this paper, we present BitShred, a system for large-scale malware similarity analysis and clustering, and for automatically uncovering semantic inter- and intra-family relationships within clusters. The key idea behind BitShred is using feature hashing to dramatically reduce the highdimensional feature spaces that are common in malware analysis. Feature hashing also allows us to mine correlated features between malware families and samples using co-clustering techniques. Our evaluation shows that BitShred speeds up typical malware triage tasks by up to 2,365x and uses up to 82x less memory on a single CPU, all with comparable accuracy to previous approaches. We also develop a parallelized version of BitShred, and demonstrate scalability within the Hadoop framework.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Jang, Jiyong and Brumley, David and Venkataraman, Shobha},
	year = {2011},
	keywords = {classification, indexing, vt-features},
	pages = {309},
	file = {Jang et al. - 2011 - BitShred feature hashing malware for scalable tri.pdf:/home/fmind/Documents/Zotero/storage/BM9JJ8AF/Jang et al. - 2011 - BitShred feature hashing malware for scalable tri.pdf:application/pdf}
}

@inproceedings{nix_classification_2017,
	title = {Classification of {Android} apps and malware using deep neural networks},
	isbn = {978-1-5090-6182-2},
	url = {http://ieeexplore.ieee.org/document/7966078/},
	doi = {10.1109/IJCNN.2017.7966078},
	abstract = {Malware targeting mobile devices is a pervasive problem in modern life. The detection of malware is essentially a software classiﬁcation problem based on information gathered from program analysis. We focus on classiﬁcation of Android applications using system API-call sequences and investigate the effectiveness of Deep Neural Networks (DNNs) for such purpose. The ability of DNNs to learn complex and ﬂexible features may lead to timely and effective detection of malware. We design a Convolutional Neural Network (CNN) for sequence classiﬁcation and conduct a set of experiments on malware detection and categorization of software into functionality groups to test and compare our CNN with classiﬁcations by recurrent neural network (LSTM) and other n-gram based methods. Both CNN and LSTM signiﬁcantly outperformed n-gram based methods. Surprisingly, the performance of our CNN is also much better than that of the LSTM, which is considered a natural choice for sequential data.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Nix, Robin and Zhang, Jian},
	month = may,
	year = {2017},
	keywords = {classification},
	pages = {1871--1878},
	file = {Nix and Zhang - 2017 - Classification of Android apps and malware using d.pdf:/home/fmind/Documents/Zotero/storage/L6WFAFB9/Nix and Zhang - 2017 - Classification of Android apps and malware using d.pdf:application/pdf}
}

@article{jarzabek_design_1995,
	title = {Design of a {Geneiric} {Reverse} {Engineering} {Assistant} {Tool}},
	abstract = {Reverse engineering is a knowledge-intensive process. We believe the involvement of a domain expert is critical in any but a trivial reverse engineering task. Our approach to reverse engineering assumes close cooperation between a domain expert and a knowledge-based reverse engineering assistant tool. Reverse engineering progresses in steps. At each step, a tool applies heuristic rules to extract design views, while a domain expert accepts/rejects decisions made by U tool and provides additional input to tune in thle reverse engineering process. In our projects, we reverse engineer to enhance program understanding and to facilitate sofnvare re-engineering. We apply reverse engineering to variety of sources, produce many types of design views, use many design view presentation methods and, finally, deal with a rich, evolving set of reverse engineering heuristics. Therefore, we designed a flexible reverse engineering tool that can be easily customized to a reverse engineering task in hand. In particular,, our tool ( I ) can be customized to work with different source languages, (2) contains an end-user facility to specify reverse engineering heuristics, to inspect design views extracted from source programs and to specify filters to tune in the reverse engineering process and (3) can export design views to other toolsforfurther processing.},
	language = {en},
	author = {Jarzabek, Stan and Keam, Tan Poh},
	month = jul,
	year = {1995},
	keywords = {reverse},
	pages = {10},
	file = {Jarzabek and Keam - Design of a Geneiric Reverse Engineering Assistant.pdf:/home/fmind/Documents/Zotero/storage/EVPWT28B/Jarzabek and Keam - Design of a Geneiric Reverse Engineering Assistant.pdf:application/pdf}
}

@inproceedings{leita_exploiting_2010,
	title = {Exploiting diverse observation perspectives to get insights on the malware landscape},
	isbn = {978-1-4244-7500-1},
	url = {http://ieeexplore.ieee.org/document/5544291/},
	doi = {10.1109/DSN.2010.5544291},
	abstract = {We are witnessing an increasing complexity in the malware analysis scenario. The usage of polymorphic techniques generates a new challenge: it is often difﬁcult to discern the instance of a known polymorphic malware from that of a newly encountered malware family, and to evaluate the impact of patching and code sharing among malware writers in order to prioritize analysis efforts.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Leita, Corrado and Bayer, Ulrich and Kirda, Engin},
	month = jun,
	year = {2010},
	keywords = {study, classification},
	pages = {393--402},
	file = {Leita et al. - 2010 - Exploiting diverse observation perspectives to get.pdf:/home/fmind/Documents/Zotero/storage/KCYNKAVB/Leita et al. - 2010 - Exploiting diverse observation perspectives to get.pdf:application/pdf}
}

@incollection{hutchison_exploring_2013,
	address = {Berlin, Heidelberg},
	title = {Exploring {Discriminatory} {Features} for {Automated} {Malware} {Classification}},
	volume = {7967},
	isbn = {978-3-642-39234-4 978-3-642-39235-1},
	url = {http://link.springer.com/10.1007/978-3-642-39235-1_3},
	abstract = {The ever-growing malware threat in the cyber space calls for techniques that are more effective than widely deployed signature-based detection systems and more scalable than manual reverse engineering by forensic experts. To counter large volumes of malware variants, machine learning techniques have been applied recently for automated malware classiﬁcation. Despite the successes made from these efforts, we still lack a basic understanding of some key issues, such as what features we should use and which classiﬁers perform well on malware data. Against this backdrop, the goal of this work is to explore discriminatory features for automated malware classiﬁcation. We conduct a systematic study on the discriminative power of various types of features extracted from malware programs, and experiment with different combinations of feature selection algorithms and classiﬁers. Our results not only offer insights into what features most distinguish malware families, but also shed light on how to develop scalable techniques for automated malware classiﬁcation in practice.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Detection of {Intrusions} and {Malware}, and {Vulnerability} {Assessment}},
	publisher = {Springer Berlin Heidelberg},
	author = {Yan, Guanhua and Brown, Nathan and Kong, Deguang},
	editor = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard and Rieck, Konrad and Stewin, Patrick and Seifert, Jean-Pierre},
	year = {2013},
	doi = {10.1007/978-3-642-39235-1_3},
	keywords = {classification, vt-features},
	pages = {41--61},
	file = {Yan et al. - 2013 - Exploring Discriminatory Features for Automated Ma.pdf:/home/fmind/Documents/Zotero/storage/2WRJFKJF/Yan et al. - 2013 - Exploring Discriminatory Features for Automated Ma.pdf:application/pdf}
}

@article{canto_large_2017,
	title = {Large scale malware collection: lessons learned},
	abstract = {In order to assure accuracy and realism of resilience assessment methods and tools, it is essential to have access to ﬁeld data that are unbiased and representative. Several initiatives are taking place that offer access to malware samples for research purposes. Papers are published where techniques have been assessed thanks to these samples. Deﬁnition of benchmarking datasets is the next step ahead. In this paper, we report on the lessons learned while collecting and analysing malware samples in a large scale collaborative effort. Three different environments are described and their integration used to highlight the open issues that remain with such data collection. Three main lessons are offered to the reader. First, creation of representative malware samples datasets is probably an impossible task. Second, false negative alerts are not what we think they are. Third, false positive alerts exist where we were not used to see them. These three lessons have to be taken into account by those who want to assess the resilience of techniques with respect to malicious faults.},
	language = {en},
	author = {Canto, Julio and Sistemas, Hispasec and Dacier, Marc and Antipolis, Sophia and Kirda, Engin and Leita, Corrado},
	month = dec,
	year = {2017},
	keywords = {vt-analysis, design, vt-labels, dataset},
	pages = {6},
	file = {Canto et al. - Large scale malware collection lessons learned.pdf:/home/fmind/Documents/Zotero/storage/BITHS2QP/Canto et al. - Large scale malware collection lessons learned.pdf:application/pdf}
}

@inproceedings{hu_large-scale_2009,
	title = {Large-scale malware indexing using function-call graphs},
	isbn = {978-1-60558-894-0},
	url = {http://portal.acm.org/citation.cfm?doid=1653662.1653736},
	doi = {10.1145/1653662.1653736},
	abstract = {A major challenge of the anti-virus (AV) industry is how to effectively process the huge inﬂux of malware samples they receive every day. One possible solution to this problem is to quickly determine if a new malware sample is similar to any previously-seen malware program. In this paper, we design, implement and evaluate a malware database management system called SMIT (Symantec Malware Indexing Tree) that can efﬁciently make such determination based on malware’s function-call graphs, which is a structural representation known to be less susceptible to instruction-level obfuscations commonly employed by malware writers to evade detection of AV software. Because each malware program is represented as a graph, the problem of searching for the most similar malware program in a database to a given malware sample is cast into a nearest-neighbor search problem in a graph database. To speed up this search, we have developed an efﬁcient method to compute graph similarity that exploits structural and instruction-level information in the underlying malware programs, and a multi-resolution indexing scheme that uses a computationally economical feature vector for early pruning and resorts to a more accurate but computationally more expensive graph similarity function only when it needs to pinpoint the most similar neighbors. Results of a comprehensive performance study of the SMIT prototype using a database of more than 100,000 malware demonstrate the effective pruning power and scalability of its nearest neighbor search mechanisms.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Hu, Xin and Chiueh, Tzi-cker and Shin, Kang G.},
	year = {2009},
	keywords = {indexing, characterization, vt-features},
	pages = {611},
	file = {Hu et al. - 2009 - Large-scale malware indexing using function-call g.pdf:/home/fmind/Documents/Zotero/storage/KR2DH9B9/Hu et al. - 2009 - Large-scale malware indexing using function-call g.pdf:application/pdf}
}

@article{garcia_lightweight_2018,
	title = {Lightweight, {Obfuscation}-{Resilient} {Detection} and {Family} {Identification} of {Android} {Malware}},
	volume = {26},
	issn = {1049331X},
	url = {http://dl.acm.org/citation.cfm?doid=3177743.3162625},
	doi = {10.1145/3162625},
	language = {en},
	number = {3},
	urldate = {2018-04-10},
	journal = {ACM Transactions on Software Engineering and Methodology},
	author = {Garcia, Joshua and Hammad, Mahmoud and Malek, Sam},
	month = jan,
	year = {2018},
	keywords = {classification, obfuscation},
	pages = {1--29},
	file = {Garcia et al. - 2018 - Lightweight, Obfuscation-Resilient Detection and F.pdf:/home/fmind/Documents/Zotero/storage/WECZDRHL/Garcia et al. - 2018 - Lightweight, Obfuscation-Resilient Detection and F.pdf:application/pdf}
}

@inproceedings{moser_limits_2007,
	title = {Limits of {Static} {Analysis} for {Malware} {Detection}},
	isbn = {978-0-7695-3060-4},
	url = {http://ieeexplore.ieee.org/document/4413008/},
	doi = {10.1109/ACSAC.2007.21},
	abstract = {Malicious code is an increasingly important problem that threatens the security of computer systems. The traditional line of defense against malware is composed of malware detectors such as virus and spyware scanners. Unfortunately, both researchers and malware authors have demonstrated that these scanners, which use pattern matching to identify malware, can be easily evaded by simple code transformations. To address this shortcoming, more powerful malware detectors have been proposed. These tools rely on semantic signatures and employ static analysis techniques such as model checking and theorem proving to perform detection. While it has been shown that these systems are highly effective in identifying current malware, it is less clear how successful they would be against adversaries that take into account the novel detection mechanisms.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Moser, Andreas and Kruegel, Christopher and Kirda, Engin},
	month = dec,
	year = {2007},
	keywords = {detection, static, obfuscation},
	pages = {421--430},
	file = {Moser et al. - 2007 - Limits of Static Analysis for Malware Detection.pdf:/home/fmind/Documents/Zotero/storage/6FFKX72F/Moser et al. - 2007 - Limits of Static Analysis for Malware Detection.pdf:application/pdf}
}

@inproceedings{lindorfer_lines_2012,
	title = {Lines of malicious code: insights into the malicious software industry},
	isbn = {978-1-4503-1312-4},
	shorttitle = {Lines of malicious code},
	url = {http://dl.acm.org/citation.cfm?doid=2420950.2421001},
	doi = {10.1145/2420950.2421001},
	abstract = {Malicious software installed on infected computers is a fundamental component of online crime. Malware development thus plays an essential role in the underground economy of cyber-crime. Malware authors regularly update their software to defeat defenses or to support new or improved criminal business models. A large body of research has focused on detecting malware, defending against it and identifying its functionality. In addition to these goals, however, the analysis of malware can provide a glimpse into the software development industry that develops malicious code. In this work, we present techniques to observe the evolution of a malware family over time. First, we develop techniques to compare versions of malicious code and quantify their differences. Furthermore, we use behavior observed from dynamic analysis to assign semantics to binary code and to identify functional components within a malware binary. By combining these techniques, we are able to monitor the evolution of a malware’s functional components. We implement these techniques in a system we call BEAGLE, and apply it to the observation of 16 malware strains over several months. The results of these experiments provide insight into the effort involved in updating malware code, and show that BEAGLE can identify changes to individual malware components.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Lindorfer, Martina and Di Federico, Alessandro and Maggi, Federico and Comparetti, Paolo Milani and Zanero, Stefano},
	year = {2012},
	keywords = {dynamic, indexing, vt-features, evolution},
	pages = {349},
	file = {Lindorfer et al. - 2012 - Lines of malicious code insights into the malicio.pdf:/home/fmind/Documents/Zotero/storage/KV82QTRT/Lindorfer et al. - 2012 - Lines of malicious code insights into the malicio.pdf:application/pdf}
}

@article{mezzour_longitudinal_2016,
	title = {Longitudinal analysis of a large corpus of cyber threat descriptions},
	volume = {12},
	issn = {2263-8733},
	url = {http://link.springer.com/10.1007/s11416-014-0217-8},
	doi = {10.1007/s11416-014-0217-8},
	language = {en},
	number = {1},
	urldate = {2018-04-10},
	journal = {Journal of Computer Virology and Hacking Techniques},
	author = {Mezzour, Ghita and Carley, L. Richard and Carley, Kathleen M.},
	month = feb,
	year = {2016},
	keywords = {study, evolution},
	pages = {11--22},
	file = {Mezzour et al. - 2016 - Longitudinal analysis of a large corpus of cyber t.pdf:/home/fmind/Documents/Zotero/storage/MXXI9QPW/Mezzour et al. - 2016 - Longitudinal analysis of a large corpus of cyber t.pdf:application/pdf}
}

@article{thomson_malware_2015,
	title = {Malware {Identification} {Using} {Cognitively}-{Inspired} {Inference}},
	abstract = {Malware reverse-engineering is an important type of analysis in cybersecurity. Rapidly identifying the tasks that a piece of malware is designed to perform is an important part of reverse engineering that is generally manually performed as it relies heavily on human intuition This paper describes how the use of cognitively-inspired inference can assist in automating some of malware task identification. Computational models derived from humaninspired inference were able to reach relatively higher asymptotic performance faster than traditional machine learning approaches such as decision trees and naïve Bayes classifiers. Using a real-world malware dataset, these cognitive models identified sets of tasks with an unbiased F1 measure of 0.94. Even when trained on historical datasets of malware samples from different families, the cognitive models still maintained the precision of decision tree and Bayes classifiers while providing a significant improvement to recall.},
	language = {en},
	author = {Thomson, Robert and Lebiere, Christian and Bennati, Stefano and Shakarian, Paulo and Nunes, Eric},
	year = {2015},
	keywords = {cognitive},
	pages = {8},
	file = {Thomson et al. - Malware Identification Using Cognitively-Inspired .pdf:/home/fmind/Documents/Zotero/storage/UBNG2ZTX/Thomson et al. - Malware Identification Using Cognitively-Inspired .pdf:application/pdf}
}

@inproceedings{grier_manufacturing_2012,
	title = {Manufacturing compromise: the emergence of exploit-as-a-service},
	isbn = {978-1-4503-1651-4},
	shorttitle = {Manufacturing compromise},
	url = {http://dl.acm.org/citation.cfm?doid=2382196.2382283},
	doi = {10.1145/2382196.2382283},
	abstract = {We investigate the emergence of the exploit-as-a-service model for driveby browser compromise. In this regime, attackers pay for an exploit kit or service to do the “dirty work” of exploiting a victim’s browser, decoupling the complexities of browser and plugin vulnerabilities from the challenges of generating trafﬁc to a website under the attacker’s control. Upon a successful exploit, these kits load and execute a binary provided by the attacker, effectively transferring control of a victim’s machine to the attacker.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Grier, Chris and Pitsillidis, Andreas and Provos, Niels and Rafique, M. Zubair and Rajab, Moheeb Abu and Rossow, Christian and Thomas, Kurt and Paxson, Vern and Savage, Stefan and Voelker, Geoffrey M. and Ballard, Lucas and Caballero, Juan and Chachra, Neha and Dietrich, Christian J. and Levchenko, Kirill and Mavrommatis, Panayiotis and McCoy, Damon and Nappa, Antonio},
	year = {2012},
	keywords = {study},
	pages = {821},
	file = {Grier et al. - 2012 - Manufacturing compromise the emergence of exploit.pdf:/home/fmind/Documents/Zotero/storage/LE2ZB59K/Grier et al. - 2012 - Manufacturing compromise the emergence of exploit.pdf:application/pdf}
}

@article{wang_microsoft_2015,
	title = {Microsoft {Malware} {Classification} {Challenge} ({BIG} 2015): {First} {Place} {Team}: {Say} {No} to {Overfitting}},
	abstract = {This document describes our approach to the Microsoft Malware Classification Challenge (BIG 2015). Our approach is based on intensive Feature Engineering, Gradient Boosting (Xgboost), Ensembling, Semi­supervised learning and calibration. We achieve the 1st place with 0.0028 multi­class logarithmic loss in the private leaderboard.},
	language = {en},
	author = {Wang, Xiaozhou and Liu, Jiwei and Chen, Xueer},
	month = may,
	year = {2015},
	keywords = {classification},
	pages = {7},
	file = {Wang et al. - Microsoft Malware Classification Challenge (BIG 20.pdf:/home/fmind/Documents/Zotero/storage/SRSQ9RFX/Wang et al. - Microsoft Malware Classification Challenge (BIG 20.pdf:application/pdf}
}

@article{graziano_needles_2015,
	title = {Needles in a {Haystack}: {Mining} {Information} from {Public} {Dynamic} {Analysis} {Sandboxes} for {Malware} {Intelligence}},
	abstract = {Malware sandboxes are automated dynamic analysis systems that execute programs in a controlled environment. Within the large volumes of samples submitted every day to these services, some submissions appear to be different from others, and show interesting characteristics. For example, we observed that malware samples involved in famous targeted attacks – like the Regin APT framework or the recently disclosed malwares from the Equation Group – were submitted to our sandbox months or even years before they were detected in the wild. In other cases, the malware developers themselves interact with public sandboxes to test their creations or to develop a new evasion technique. We refer to similar cases as malware developments.},
	language = {en},
	author = {Graziano, Mariano and Canali, Davide and Lanzi, Andrea and Bilge, Leyla and Balzarotti, Davide},
	year = {2015},
	keywords = {dynamic},
	pages = {16},
	file = {Graziano et al. - Needles in a Haystack Mining Information from Pub.pdf:/home/fmind/Documents/Zotero/storage/L5Z5YHPP/Graziano et al. - Needles in a Haystack Mining Information from Pub.pdf:application/pdf}
}

@article{li_challenges_2010,
	title = {On {Challenges} in {Evaluating} {Malware} {Clustering}},
	abstract = {Malware clustering and classiﬁcation are important tools that enable analysts to prioritize their malware analysis efforts. The recent emergence of fully automated methods for malware clustering and classiﬁcation that report high accuracy suggests that this problem may largely be solved. In this paper, we report the results of our attempt to conﬁrm our conjecture that the method of selecting ground-truth data in prior evaluations biases their results toward high accuracy. To examine this conjecture, we apply clustering algorithms from a different domain (plagiarism detection), ﬁrst to the dataset used in a prior work’s evaluation and then to a wholly new malware dataset, to see if clustering algorithms developed without attention to subtleties of malware obfuscation are nevertheless successful. While these studies provide conﬂicting signals as to the correctness of our conjecture, our investigation of possible reasons uncovers, we believe, a cautionary note regarding the signiﬁcance of highly accurate clustering results, as can be impacted by testing on a dataset with a biased cluster-size distribution.},
	language = {en},
	author = {Li, Peng and Liu, Limin and Gao, Debin and Reiter, Michael K},
	year = {2010},
	keywords = {classification},
	pages = {18},
	file = {Li et al. - On Challenges in Evaluating Malware Clustering.pdf:/home/fmind/Documents/Zotero/storage/I2PA6W4P/Li et al. - On Challenges in Evaluating Malware Clustering.pdf:application/pdf}
}

@inproceedings{sommer_outside_2010,
	title = {Outside the {Closed} {World}: {On} {Using} {Machine} {Learning} for {Network} {Intrusion} {Detection}},
	isbn = {978-1-4244-6894-2},
	shorttitle = {Outside the {Closed} {World}},
	url = {http://ieeexplore.ieee.org/document/5504793/},
	doi = {10.1109/SP.2010.25},
	abstract = {In network intrusion detection research, one popular strategy for ﬁnding attacks is monitoring a network’s activity for anomalies: deviations from proﬁles of normality previously learned from benign trafﬁc, typically identiﬁed using tools borrowed from the machine learning community. However, despite extensive academic research one ﬁnds a striking gap in terms of actual deployments of such systems: compared with other intrusion detection approaches, machine learning is rarely employed in operational “real world” settings. We examine the differences between the network intrusion detection problem and other areas where machine learning regularly ﬁnds much more success. Our main claim is that the task of ﬁnding attacks is fundamentally different from these other applications, making it signiﬁcantly harder for the intrusion detection community to employ machine learning effectively. We support this claim by identifying challenges particular to network intrusion detection, and provide a set of guidelines meant to strengthen future research on anomaly detection.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Sommer, Robin and Paxson, Vern},
	year = {2010},
	keywords = {vt-analysis},
	pages = {305--316},
	file = {Sommer and Paxson - 2010 - Outside the Closed World On Using Machine Learnin.pdf:/home/fmind/Documents/Zotero/storage/5LDGTR7V/Sommer and Paxson - 2010 - Outside the Closed World On Using Machine Learnin.pdf:application/pdf}
}

@inproceedings{perdisci_vamo:_2012,
	title = {{VAMO}: towards a fully automated malware clustering validity analysis},
	isbn = {978-1-4503-1312-4},
	shorttitle = {{VAMO}},
	url = {http://dl.acm.org/citation.cfm?doid=2420950.2420999},
	doi = {10.1145/2420950.2420999},
	abstract = {Malware clustering is commonly applied by malware analysts to cope with the increasingly growing number of distinct malware variants collected every day from the Internet. While malware clustering systems can be useful for a variety of applications, assessing the quality of their results is intrinsically hard. In fact, clustering can be viewed as an unsupervised learning process over a dataset for which the complete ground truth is usually not available. Previous studies propose to evaluate malware clustering results by leveraging the labels assigned to the malware samples by multiple anti-virus scanners (AVs). However, the methods proposed thus far require a (semi-)manual adjustment and mapping between labels generated by different AVs, and are limited to selecting a reference sub-set of samples for which an agreement regarding their labels can be reached across a majority of AVs. This approach may bias the reference set towards “easy to cluster” malware samples, thus potentially resulting in an overoptimistic estimate of the accuracy of the malware clustering results.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Perdisci, Roberto and U, ManChon},
	year = {2012},
	keywords = {classification, inconsistency, vt-labels},
	pages = {329},
	file = {Perdisci and U - 2012 - VAMO towards a fully automated malware clustering.pdf:/home/fmind/Documents/Zotero/storage/554LBYPB/Perdisci and U - 2012 - VAMO towards a fully automated malware clustering.pdf:application/pdf}
}

@inproceedings{rossow_prudent_2012,
	title = {Prudent {Practices} for {Designing} {Malware} {Experiments}: {Status} {Quo} and {Outlook}},
	isbn = {978-1-4673-1244-8 978-0-7695-4681-0},
	shorttitle = {Prudent {Practices} for {Designing} {Malware} {Experiments}},
	url = {http://ieeexplore.ieee.org/document/6234405/},
	doi = {10.1109/SP.2012.14},
	abstract = {Malware researchers rely on the observation of malicious code in execution to collect datasets for a wide array of experiments, including generation of detection models, study of longitudinal behavior, and validation of prior research. For such research to reﬂect prudent science, the work needs to address a number of concerns relating to the correct and representative use of the datasets, presentation of methodology in a fashion sufﬁciently transparent to enable reproducibility, and due consideration of the need not to harm others.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Rossow, Christian and Dietrich, Christian J. and Grier, Chris and Kreibich, Christian and Paxson, Vern and Pohlmann, Norbert and Bos, Herbert and Steen, Maarten van},
	month = may,
	year = {2012},
	keywords = {vt-analysis, design},
	pages = {65--79},
	file = {Rossow et al. - 2012 - Prudent Practices for Designing Malware Experiment.pdf:/home/fmind/Documents/Zotero/storage/PBRXTSM6/Rossow et al. - 2012 - Prudent Practices for Designing Malware Experiment.pdf:application/pdf}
}

@article{willems_reverse_2012,
	title = {Reverse {Code} {Engineering} — {State} of the {Art} and {Countermeasures}},
	volume = {54},
	issn = {1611-2776},
	url = {http://www.degruyter.com/doi/10.1524/itit.2012.0664},
	doi = {10.1524/itit.2012.0664},
	abstract = {Reverse Code Engineering (RCE) is, loosely speaking, the process of analyzing a piece of code in order to understand it. RCE is often used to analyze proprietary, binary programs, and in the last few years this research area has evolved a lot. In this article, we survey and structure the area of reverse code engineering. We focus on different techniques to recover both the control and data ﬂow of a given binary program, for which no source code is available. Furthermore, we also discuss analysis techniques for malicious software (short: malware), which is commonly protected to resist analysis. We present the current state of the art of such protection techniques, while dividing them into active and passive measures. Our survey focusses on reverse engineering of binary native code for the Intel/AMD x86 architecture, and we thus disregard analysis of byte-code like Java or .NET. Nevertheless, most of the techniques presented in this article can be transferred to other architectures and operating system as well.},
	language = {en},
	number = {2},
	urldate = {2018-04-10},
	journal = {it - Information Technology},
	author = {Willems, Carsten and Freiling, Felix C.},
	month = apr,
	year = {2012},
	keywords = {reverse},
	pages = {53--63},
	file = {Willems and Freiling - 2012 - Reverse Code Engineering — State of the Art and Co.pdf:/home/fmind/Documents/Zotero/storage/2ID8DEFP/Willems and Freiling - 2012 - Reverse Code Engineering — State of the Art and Co.pdf:application/pdf}
}

@incollection{caballero_reviewer_2016,
	address = {Cham},
	title = {Reviewer {Integration} and {Performance} {Measurement} for {Malware} {Detection}},
	volume = {9721},
	isbn = {978-3-319-40666-4 978-3-319-40667-1},
	url = {http://link.springer.com/10.1007/978-3-319-40667-1_7},
	abstract = {We present and evaluate a large-scale malware detection system integrating machine learning with expert reviewers, treating reviewers as a limited labeling resource. We demonstrate that even in small numbers, reviewers can vastly improve the system’s ability to keep pace with evolving threats. We conduct our evaluation on a sample of VirusTotal submissions spanning 2.5 years and containing 1.1 million binaries with 778GB of raw feature data. Without reviewer assistance, we achieve 72\% detection at a 0.5\% false positive rate, performing comparable to the best vendors on VirusTotal. Given a budget of 80 accurate reviews daily, we improve detection to 89\% and are able to detect 42\% of malicious binaries undetected upon initial submission to VirusTotal. Additionally, we identify a previously unnoticed temporal inconsistency in the labeling of training datasets. We compare the impact of training labels obtained at the same time training data is ﬁrst seen with training labels obtained months later. We ﬁnd that using training labels obtained well after samples appear, and thus unavailable in practice for current training data, inﬂates measured detection by almost 20 percentage points. We release our cluster-based implementation, as well as a list of all hashes in our evaluation and 3\% of our entire dataset.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Detection of {Intrusions} and {Malware}, and {Vulnerability} {Assessment}},
	publisher = {Springer International Publishing},
	author = {Miller, Brad and Kantchelian, Alex and Tschantz, Michael Carl and Afroz, Sadia and Bachwani, Rekha and Faizullabhoy, Riyaz and Huang, Ling and Shankar, Vaishaal and Wu, Tony and Yiu, George and Joseph, Anthony D. and Tygar, J. D.},
	editor = {Caballero, Juan and Zurutuza, Urko and Rodríguez, Ricardo J.},
	year = {2016},
	doi = {10.1007/978-3-319-40667-1_7},
	keywords = {detection, classification, hitl},
	pages = {122--141},
	file = {Miller et al. - 2016 - Reviewer Integration and Performance Measurement f.pdf:/home/fmind/Documents/Zotero/storage/C3PFUYBM/Miller et al. - 2016 - Reviewer Integration and Performance Measurement f.pdf:application/pdf}
}

@article{bayer_scalable_2009,
	title = {Scalable, {Behavior}-{Based} {Malware} {Clustering}},
	abstract = {Anti-malware companies receive thousands of malware samples every day. To process this large quantity, a number of automated analysis tools were developed. These tools execute a malicious program in a controlled environment and produce reports that summarize the program’s actions. Of course, the problem of analyzing the reports still remains. Recently, researchers have started to explore automated clustering techniques that help to identify samples that exhibit similar behavior. This allows an analyst to discard reports of samples that have been seen before, while focusing on novel, interesting threats. Unfortunately, previous techniques do not scale well and frequently fail to generalize the observed activity well enough to recognize related malware.},
	language = {en},
	author = {Bayer, Ulrich and Comparetti, Paolo Milani and Hlauschek, Clemens and Kruegel, Christopher and Kirda, Engin},
	year = {2009},
	keywords = {classification, dynamic},
	pages = {18},
	file = {Bayer et al. - Scalable, Behavior-Based Malware Clustering.pdf:/home/fmind/Documents/Zotero/storage/SWFRXFYK/Bayer et al. - Scalable, Behavior-Based Malware Clustering.pdf:application/pdf}
}

@article{gu_vulhunter:_2015,
	title = {{VulHunter}: {Toward} {Discovering} {Vulnerabilities} {In} {Android} {Applications}},
	language = {en},
	journal = {MOBILE SYSTEMS},
	author = {Gu, Guofei},
	year = {2015},
	keywords = {detection, static},
	pages = {10},
	file = {Gu - 2015 - Chenxiong Qian Xiapu Luo Yu Le.pdf:/home/fmind/Documents/Zotero/storage/FKCCLBKW/Gu - 2015 - Chenxiong Qian Xiapu Luo Yu Le.pdf:application/pdf}
}

@inproceedings{peng_using_2012,
	title = {Using probabilistic generative models for ranking risks of {Android} apps},
	isbn = {978-1-4503-1651-4},
	url = {http://dl.acm.org/citation.cfm?doid=2382196.2382224},
	doi = {10.1145/2382196.2382224},
	abstract = {One of Android’s main defense mechanisms against malicious apps is a risk communication mechanism which, before a user installs an app, warns the user about the permissions the app requires, trusting that the user will make the right decision. This approach has been shown to be ineffective as it presents the risk information of each app in a “stand-alone” fashion and in a way that requires too much technical knowledge and time to distill useful information.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Peng, Hao and Gates, Chris and Sarma, Bhaskar and Li, Ninghui and Qi, Yuan and Potharaju, Rahul and Nita-Rotaru, Cristina and Molloy, Ian},
	year = {2012},
	keywords = {detection},
	pages = {241},
	file = {Peng et al. - 2012 - Using probabilistic generative models for ranking .pdf:/home/fmind/Documents/Zotero/storage/FURP94U4/Peng et al. - 2012 - Using probabilistic generative models for ranking .pdf:application/pdf}
}

@inproceedings{fratantonio_triggerscope:_2016,
	title = {{TriggerScope}: {Towards} {Detecting} {Logic} {Bombs} in {Android} {Applications}},
	isbn = {978-1-5090-0824-7},
	shorttitle = {{TriggerScope}},
	url = {http://ieeexplore.ieee.org/document/7546513/},
	doi = {10.1109/SP.2016.30},
	abstract = {Android is the most popular mobile platform today, and it is also the mobile operating system that is most heavily targeted by malware. Existing static analyses are effective in detecting the presence of most malicious code and unwanted information ﬂows. However, certain types of malice are very difﬁcult to capture explicitly by modeling permission sets, suspicious API calls, or unwanted information ﬂows.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Fratantonio, Yanick and Bianchi, Antonio and Robertson, William and Kirda, Engin and Kruegel, Christopher and Vigna, Giovanni},
	month = may,
	year = {2016},
	keywords = {static, characterization},
	pages = {377--396},
	file = {Fratantonio et al. - 2016 - TriggerScope Towards Detecting Logic Bombs in And.pdf:/home/fmind/Documents/Zotero/storage/UHTR7Q6A/Fratantonio et al. - 2016 - TriggerScope Towards Detecting Logic Bombs in And.pdf:application/pdf}
}

@inproceedings{zhang_towards_2015,
	title = {Towards {Automatic} {Generation} of {Security}-{Centric} {Descriptions} for {Android} {Apps}},
	isbn = {978-1-4503-3832-5},
	url = {http://dl.acm.org/citation.cfm?doid=2810103.2813669},
	doi = {10.1145/2810103.2813669},
	abstract = {To improve the security awareness of end users, Android markets directly present two classes of literal app information: 1) permission requests and 2) textual descriptions. Unfortunately, neither can serve the needs. A permission list is not only hard to understand but also inadequate; textual descriptions provided by developers are not security-centric and are signiﬁcantly deviated from the permissions. To ﬁll in this gap, we propose a novel technique to automatically generate security-centric app descriptions, based on program analysis. We implement a prototype system, DESCRIBEME, and evaluate our system using both DroidBench and real-world Android apps. Experimental results demonstrate that DESCRIBEME enables a promising technique which bridges the gap between descriptions and permissions. A further user study shows that automatically produced descriptions are not only readable but also effectively help users avoid malware and privacy-breaching apps.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Zhang, Mu and Duan, Yue and Feng, Qian and Yin, Heng},
	year = {2015},
	keywords = {static, characterization},
	pages = {518--529},
	file = {Zhang et al. - 2015 - Towards Automatic Generation of Security-Centric D.pdf:/home/fmind/Documents/Zotero/storage/AIMF2BS7/Zhang et al. - 2015 - Towards Automatic Generation of Security-Centric D.pdf:application/pdf}
}

@article{tam_evolution_2017,
	title = {The {Evolution} of {Android} {Malware} and {Android} {Analysis} {Techniques}},
	volume = {49},
	issn = {03600300},
	url = {http://dl.acm.org/citation.cfm?doid=3022634.3017427},
	doi = {10.1145/3017427},
	language = {en},
	number = {4},
	urldate = {2018-04-10},
	journal = {ACM Computing Surveys},
	author = {Tam, Kimberly and Feizollah, Ali and Anuar, Nor Badrul and Salleh, Rosli and Cavallaro, Lorenzo},
	month = jan,
	year = {2017},
	keywords = {survey, characterization},
	pages = {1--41},
	file = {Tam et al. - 2017 - The Evolution of Android Malware and Android Analy.pdf:/home/fmind/Documents/Zotero/storage/RG48J69P/Tam et al. - 2017 - The Evolution of Android Malware and Android Analy.pdf:application/pdf}
}

@article{enck_taintdroid:_2014,
	title = {{TaintDroid}: {An} {Information}-{Flow} {Tracking} {System} for {Realtime} {Privacy} {Monitoring} on {Smartphones}},
	abstract = {Today’s smartphone operating systems frequently fail to provide users with adequate control over and visibility into how third-party applications use their private data. We address these shortcomings with TaintDroid, an efﬁcient, system-wide dynamic taint tracking and analysis system capable of simultaneously tracking multiple sources of sensitive data. TaintDroid provides realtime analysis by leveraging Android’s virtualized execution environment. TaintDroid incurs only 14\% performance overhead on a CPU-bound micro-benchmark and imposes negligible overhead on interactive third-party applications. Using TaintDroid to monitor the behavior of 30 popular third-party Android applications, we found 68 instances of potential misuse of users’ private information across 20 applications. Monitoring sensitive data with TaintDroid provides informed use of third-party applications for phone users and valuable input for smartphone security service ﬁrms seeking to identify misbehaving applications.},
	language = {en},
	author = {Enck, William and Gilbert, Peter and Chun, Byung-Gon and Cox, Landon P and Jung, Jaeyeon and McDaniel, Patrick and Sheth, Anmol N},
	year = {2014},
	keywords = {dynamic, leaks},
	pages = {15},
	file = {Enck et al. - TaintDroid An Information-Flow Tracking System fo.pdf:/home/fmind/Documents/Zotero/storage/63KQCULT/Enck et al. - TaintDroid An Information-Flow Tracking System fo.pdf:application/pdf}
}

@inproceedings{gascon_structural_2013,
	title = {Structural detection of android malware using embedded call graphs},
	isbn = {978-1-4503-2488-5},
	url = {http://dl.acm.org/citation.cfm?doid=2517312.2517315},
	doi = {10.1145/2517312.2517315},
	abstract = {The number of malicious applications targeting the Android system has literally exploded in recent years. While the security community, well aware of this fact, has proposed several methods for detection of Android malware, most of these are based on permission and API usage or the identiﬁcation of expert features. Unfortunately, many of these approaches are susceptible to instruction level obfuscation techniques. Previous research on classic desktop malware has shown that some high level characteristics of the code, such as function call graphs, can be used to ﬁnd similarities between samples while being more robust against certain obfuscation strategies. However, the identiﬁcation of similarities in graphs is a non-trivial problem whose complexity hinders the use of these features for malware detection. In this paper, we explore how recent developments in machine learning classiﬁcation of graphs can be eﬃciently applied to this problem. We propose a method for malware detection based on eﬃcient embeddings of function call graphs with an explicit feature map inspired by a linear-time graph kernel. In an evaluation with 12,158 malware samples our method, purely based on structural features, outperforms several related approaches and detects 89\% of the malware with few false alarms, while also allowing to pin-point malicious code structures within Android applications.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Gascon, Hugo and Yamaguchi, Fabian and Arp, Daniel and Rieck, Konrad},
	year = {2013},
	keywords = {detection, static},
	pages = {45--54},
	file = {Gascon et al. - 2013 - Structural detection of android malware using embe.pdf:/home/fmind/Documents/Zotero/storage/QK8B7FS5/Gascon et al. - 2013 - Structural detection of android malware using embe.pdf:application/pdf}
}

@incollection{lin_stegomalware:_2015,
	address = {Cham},
	title = {Stegomalware: {Playing} {Hide} and {Seek} with {Malicious} {Components} in {Smartphone} {Apps}},
	volume = {8957},
	isbn = {978-3-319-16744-2 978-3-319-16745-9},
	shorttitle = {Stegomalware},
	url = {http://link.springer.com/10.1007/978-3-319-16745-9_27},
	abstract = {We discuss a class of smartphone malware that uses steganographic techniques to hide malicious executable components within their assets, such as documents, databases, or multimedia ﬁles. In contrast with existing obfuscation techniques, many existing information hiding algorithms are demonstrably secure, which would make such stegomalware virtually undetectable by static analysis techniques. We introduce various types of stegomalware attending to the location of the hidden payload and the components required to extract it. We demonstrate its feasibility with a prototype implementation of a stegomalware app that has remained undetected in Google Play so far. We also address the question of whether steganographic capabilities are already being used for malicious purposes. To do this, we introduce a detection system for stegomalware and use it to analyze around 55K apps retrieved from both malware sources and alternative app markets. Our preliminary results are not conclusive, but reveal that many apps do incorporate steganographic code and that there is a substantial amount of hidden content embedded in app assets.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Information {Security} and {Cryptology}},
	publisher = {Springer International Publishing},
	author = {Suarez-Tangil, Guillermo and Tapiador, Juan E. and Peris-Lopez, Pedro},
	editor = {Lin, Dongdai and Yung, Moti and Zhou, Jianying},
	year = {2015},
	doi = {10.1007/978-3-319-16745-9_27},
	keywords = {obfuscation},
	pages = {496--515},
	file = {Suarez-Tangil et al. - 2015 - Stegomalware Playing Hide and Seek with Malicious.pdf:/home/fmind/Documents/Zotero/storage/L7AWZNM6/Suarez-Tangil et al. - 2015 - Stegomalware Playing Hide and Seek with Malicious.pdf:application/pdf}
}

@article{maiorca_stealth_2015,
	title = {Stealth attacks: {An} extended insight into the obfuscation effects on {Android} malware},
	volume = {51},
	issn = {01674048},
	shorttitle = {Stealth attacks},
	url = {http://linkinghub.elsevier.com/retrieve/pii/S016740481500022X},
	doi = {10.1016/j.cose.2015.02.007},
	abstract = {In order to eﬀectively evade anti-malware solutions, Android malware authors are progressively resorting to automatic obfuscation strategies. Recent works have shown, on small-scale experiments, the possibility of evading anti-malware engines by applying simple obfuscation transformations on previously detected malware samples. In this paper, we provide a large-scale experiment in which the detection performances of a high number of anti-malware solutions are tested against two diﬀerent sets of malware samples that have been obfuscated according to diﬀerent strategies. Moreover, we show that anti-malware engines search for possible malicious content inside assets and entry-point classes. We also provide a temporal analysis of the detection performances of anti-malware engines to verify if their resilience has improved since 2013. Finally, we show how, by manipulating the area of the Android executable that contains the strings used by the application, it is possible to deceive anti-malware engines so that they will identify legitimate samples as malware. On one hand, the attained results show that anti-malware systems have improved their resilience against trivial obfuscation techniques. On the other hand, more complex changes to the application executable have proved to be still eﬀective against detection. Thus, we claim that a deeper static (or dynamic) analysis of the application is needed to improve the robustness of such systems.},
	language = {en},
	urldate = {2018-04-10},
	journal = {Computers \& Security},
	author = {Maiorca, Davide and Ariu, Davide and Corona, Igino and Aresu, Marco and Giacinto, Giorgio},
	month = jun,
	year = {2015},
	keywords = {obfuscation},
	pages = {16--31},
	file = {Maiorca et al. - 2015 - Stealth attacks An extended insight into the obfu.pdf:/home/fmind/Documents/Zotero/storage/3SIWM7LK/Maiorca et al. - 2015 - Stealth attacks An extended insight into the obfu.pdf:application/pdf}
}

@inproceedings{holland_security_2015,
	title = {Security {Toolbox} for {Detecting} {Novel} and {Sophisticated} {Android} {Malware}},
	isbn = {978-1-4799-1934-5},
	url = {http://ieeexplore.ieee.org/document/7203055/},
	doi = {10.1109/ICSE.2015.235},
	abstract = {This paper presents a demo of our Security Toolbox to detect novel malware in Android apps. This Toolbox is developed through our recent research project funded by the DARPA Automated Program Analysis for Cybersecurity (APAC) project. The adversarial challenge ("Red") teams in the DARPA APAC program are tasked with designing sophisticated malware to test the bounds of malware detection technology being developed by the research and development ("Blue") teams. Our research group, a Blue team in the DARPA APAC program, proposed a “human-in-the-loop program analysis” approach to detect malware given the source or Java bytecode for an Android app. Our malware detection apparatus consists of two components: a general-purpose program analysis platform called Atlas, and a Security Toolbox built on the Atlas platform. This paper describes the major design goals, the Toolbox components to achieve the goals, and the workﬂow for auditing Android apps. The accompanying video illustrates features of the Toolbox through a live audit.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Holland, Benjamin and Deering, Tom and Kothari, Suresh and Mathews, Jon and Ranade, Nikhil},
	month = may,
	year = {2015},
	keywords = {detection, hitl},
	pages = {733--736},
	file = {Holland et al. - 2015 - Security Toolbox for Detecting Novel and Sophistic.pdf:/home/fmind/Documents/Zotero/storage/WA8C49VF/Holland et al. - 2015 - Security Toolbox for Detecting Novel and Sophistic.pdf:application/pdf}
}

@inproceedings{grace_riskranker:_2012,
	title = {{RiskRanker}: scalable and accurate zero-day android malware detection},
	isbn = {978-1-4503-1301-8},
	shorttitle = {{RiskRanker}},
	url = {http://dl.acm.org/citation.cfm?doid=2307636.2307663},
	doi = {10.1145/2307636.2307663},
	abstract = {Smartphone sales have recently experienced explosive growth. Their popularity also encourages malware authors to penetrate various mobile marketplaces with malicious applications (or apps). These malicious apps hide in the sheer number of other normal apps, which makes their detection challenging. Existing mobile anti-virus software are inadequate in their reactive nature by relying on known malware samples for signature extraction. In this paper, we propose a proactive scheme to spot zero-day Android malware. Without relying on malware samples and their signatures, our scheme is motivated to assess potential security risks posed by these untrusted apps. Speciﬁcally, we have developed an automated system called RiskRanker to scalably analyze whether a particular app exhibits dangerous behavior (e.g., launching a root exploit or sending background SMS messages). The output is then used to produce a prioritized list of reduced apps that merit further investigation. When applied to examine 118, 318 total apps collected from various Android markets over September and October 2011, our system takes less than four days to process all of them and eﬀectively reports 3, 281 risky apps. Among these reported apps, we successfully uncovered 718 malware samples (in 29 families) and 322 of them are zero-day (in 11 families). These results demonstrate the eﬃcacy and scalability of RiskRanker to police Android markets of all stripes.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Grace, Michael and Zhou, Yajin and Zhang, Qiang and Zou, Shihong and Jiang, Xuxian},
	year = {2012},
	keywords = {detection},
	pages = {281},
	file = {Grace et al. - 2012 - RiskRanker scalable and accurate zero-day android.pdf:/home/fmind/Documents/Zotero/storage/3LDMRBJX/Grace et al. - 2012 - RiskRanker scalable and accurate zero-day android.pdf:application/pdf}
}

@inproceedings{deo_prescience:_2016,
	title = {Prescience: {Probabilistic} {Guidance} on the {Retraining} {Conundrum} for {Malware} {Detection}},
	isbn = {978-1-4503-4573-6},
	shorttitle = {Prescience},
	url = {http://dl.acm.org/citation.cfm?doid=2996758.2996769},
	doi = {10.1145/2996758.2996769},
	abstract = {Malware evolves perpetually and relies on increasingly sophisticated attacks to supersede defense strategies. Datadriven approaches to malware detection run the risk of becoming rapidly antiquated. Keeping pace with malware requires models that are periodically enriched with fresh knowledge, commonly known as retraining. In this work, we propose the use of Venn-Abers predictors for assessing the quality of binary classiﬁcation tasks as a ﬁrst step towards identifying antiquated models. One of the key beneﬁts behind the use of Venn-Abers predictors is that they are automatically well calibrated and oﬀer probabilistic guidance on the identiﬁcation of nonstationary populations of malware. Our framework is agnostic to the underlying classiﬁcation algorithm and can then be used for building better retraining strategies in the presence of concept drift. Results obtained over a timeline-based evaluation with about 90K samples show that our framework can identify when models tend to become obsolete.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Deo, Amit and Dash, Santanu Kumar and Suarez-Tangil, Guillermo and Vovk, Volodya and Cavallaro, Lorenzo},
	year = {2016},
	keywords = {detection, design},
	pages = {71--82},
	file = {Deo et al. - 2016 - Prescience Probabilistic Guidance on the Retraini.pdf:/home/fmind/Documents/Zotero/storage/2T8T7F4S/Deo et al. - 2016 - Prescience Probabilistic Guidance on the Retraini.pdf:application/pdf}
}

@inproceedings{portokalidis_paranoid_2010,
	title = {Paranoid {Android}: versatile protection for smartphones},
	isbn = {978-1-4503-0133-6},
	shorttitle = {Paranoid {Android}},
	url = {http://portal.acm.org/citation.cfm?doid=1920261.1920313},
	doi = {10.1145/1920261.1920313},
	abstract = {Smartphone usage has been continuously increasing in recent years. Moreover, smartphones are often used for privacysensitive tasks, becoming highly valuable targets for attackers. They are also quite diﬀerent from PCs, so that PCoriented solutions are not always applicable, or do not oﬀer comprehensive security. We propose an alternative solution, where security checks are applied on remote security servers that host exact replicas of the phones in virtual environments. The servers are not subject to the same constraints, allowing us to apply multiple detection techniques simultaneously. We implemented a prototype of this security model for Android phones, and show that it is both practical and scalable: we generate no more than 2KiB/s and 64B/s of trace data for high-loads and idle operation respectively, and are able to support more than a hundred replicas running on a single server.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Portokalidis, Georgios and Homburg, Philip and Anagnostakis, Kostas and Bos, Herbert},
	year = {2010},
	keywords = {hardening},
	pages = {347},
	file = {Portokalidis et al. - 2010 - Paranoid Android versatile protection for smartph.pdf:/home/fmind/Documents/Zotero/storage/WYTDEK7X/Portokalidis et al. - 2010 - Paranoid Android versatile protection for smartph.pdf:application/pdf}
}

@article{fattori_reconstruction_2014,
	title = {On the {Reconstruction} of {Android} {Malware} {Behaviors}},
	abstract = {Today mobile devices and their application marketplaces drive the entire economy of the mobile landscape. For instance, Android platforms alone have produced staggering revenues exceeding 9 billion USD, which unfortunately attracts cybercriminals with malware now hitting the Android markets at an alarmingly rising pace.},
	language = {en},
	author = {Fattori, Aristide and Tam, Kimberly and Khan, Salahuddin J and Reina, Alessandro and Cavallaro, Lorenzo},
	year = {2014},
	keywords = {classification, dynamic},
	pages = {14},
	file = {Fattori et al. - On the Reconstruction of Android Malware Behaviors.pdf:/home/fmind/Documents/Zotero/storage/6JNAT4BK/Fattori et al. - On the Reconstruction of Android Malware Behaviors.pdf:application/pdf}
}

@article{elish_need_2015,
	title = {On the {Need} of {Precise} {Inter}-{App} {ICC} {Classiﬁcation} for {Detecting} {Android} {Malware} {Collusions}},
	abstract = {Malware collusion is a new threat against Android application security. It refers to the scenario where two or more applications interact with each other to perform malicious tasks. Most existing solutions assume the attack model of a standalone malicious application, and thus cannot detect collusion. The objective of this position paper is to point out the need for practical solutions for detecting malware collusion. We show experimental evidence on the technical challenges associated with classifying benign Android inter-component communication (ICC) ﬂows from colluding ones. We statically construct ICC Maps to capture pairwise communicating ICC channels of 2,644 real benign apps. We ﬁnd that existing permission-based collusion-detection policies trigger a large number of false alerts in benign apps pairs.},
	language = {en},
	author = {Elish, Karim O},
	year = {2015},
	keywords = {characterization},
	pages = {5},
	file = {Elish - On the Need of Precise Inter-App ICC Classiﬁcation.pdf:/home/fmind/Documents/Zotero/storage/UKR68PV3/Elish - On the Need of Precise Inter-App ICC Classiﬁcation.pdf:application/pdf}
}

@inproceedings{enck_lightweight_2009,
	title = {On lightweight mobile phone application certification},
	isbn = {978-1-60558-894-0},
	url = {http://portal.acm.org/citation.cfm?doid=1653662.1653691},
	doi = {10.1145/1653662.1653691},
	abstract = {Users have begun downloading an increasingly large number of mobile phone applications in response to advancements in handsets and wireless networks. The increased number of applications results in a greater chance of installing Trojans and similar malware. In this paper, we propose the Kirin security service for Android, which performs lightweight certiﬁcation of applications to mitigate malware at install time. Kirin certiﬁcation uses security rules, which are templates designed to conservatively match undesirable properties in security conﬁguration bundled with applications. We use a variant of security requirements engineering techniques to perform an in-depth security analysis of Android to produce a set of rules that match malware characteristics. In a sample of 311 of the most popular applications downloaded from the ofﬁcial Android Market, Kirin and our rules found 5 applications that implement dangerous functionality and therefore should be installed with extreme caution. Upon close inspection, another ﬁve applications asserted dangerous rights, but were within the scope of reasonable functional needs. These results indicate that security conﬁguration bundled with Android applications provides practical means of detecting malware.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Enck, William and Ongtang, Machigar and McDaniel, Patrick},
	year = {2009},
	keywords = {detection},
	pages = {235},
	file = {Enck et al. - 2009 - On lightweight mobile phone application certificat.pdf:/home/fmind/Documents/Zotero/storage/L3WPRKR9/Enck et al. - 2009 - On lightweight mobile phone application certificat.pdf:application/pdf}
}

@inproceedings{spreitzenbarth_mobile-sandbox:_2013,
	title = {Mobile-sandbox: having a deeper look into android applications},
	isbn = {978-1-4503-1656-9},
	shorttitle = {Mobile-sandbox},
	url = {http://dl.acm.org/citation.cfm?doid=2480362.2480701},
	doi = {10.1145/2480362.2480701},
	abstract = {Smartphones in general and Android in particular are increasingly shifting into the focus of cybercriminals. For understanding the threat to security and privacy it is important for security researchers to analyze malicious software written for these systems. The exploding number of Android malware calls for automation in the analysis. In this paper, we present Mobile-Sandbox, a system designed to automatically analyze Android applications in two novel ways: (1) it combines static and dynamic analysis, i.e., results of static analysis are used to guide dynamic analysis and extend coverage of executed code, and (2) it uses speciﬁc techniques to log calls to native (i.e., “non-Java”) APIs. We evaluated the system on more than 36,000 applications from Asian thirdparty mobile markets and found that 24\% of all applications actually use native calls in their code.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Spreitzenbarth, Michael and Freiling, Felix and Echtler, Florian and Schreck, Thomas and Hoffmann, Johannes},
	year = {2013},
	keywords = {static, dynamic, characterization},
	pages = {1808},
	file = {Spreitzenbarth et al. - 2013 - Mobile-sandbox having a deeper look into android .pdf:/home/fmind/Documents/Zotero/storage/N6MEEM4V/Spreitzenbarth et al. - 2013 - Mobile-sandbox having a deeper look into android .pdf:application/pdf}
}

@inproceedings{chakradeo_mast:_2013,
	title = {{MAST}: triage for market-scale mobile malware analysis},
	isbn = {978-1-4503-1998-0},
	shorttitle = {{MAST}},
	url = {http://dl.acm.org/citation.cfm?doid=2462096.2462100},
	doi = {10.1145/2462096.2462100},
	abstract = {Malware is a pressing concern for mobile application market operators. While current mitigation techniques are keeping pace with the relatively infrequent presence of malicious code, the rapidly increasing rate of application development makes manual and resourceintensive automated analysis costly at market-scale. To address this resource imbalance, we present the Mobile Application Security Triage (MAST) architecture, a tool that helps to direct scarce malware analysis resources towards the applications with the greatest potential to exhibit malicious behavior. MAST analyzes attributes extracted from just the application package using Multiple Correspondence Analysis (MCA), a statistical method that measures the correlation between multiple categorical (i.e., qualitative) data. We train MAST using over 15,000 applications from Google Play and a dataset of 732 known-malicious applications. We then use MAST to perform triage on three third-party markets of different size and malware composition—36,710 applications in total. Our experiments show that MAST is both effective and performant. Using MAST ordered ranking, malware-analysis tools can ﬁnd 95\% of malware at the cost of analyzing 13\% of the non-malicious applications on average across multiple markets, and MAST triage processes markets in less than a quarter of the time required to perform signature detection. More importantly, we show that successful triage can dramatically reduce the costs of removing malicious applications from markets.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Chakradeo, Saurabh and Reaves, Bradley and Traynor, Patrick and Enck, William},
	year = {2013},
	keywords = {detection, vt-features},
	pages = {13},
	file = {Chakradeo et al. - 2013 - MAST triage for market-scale mobile malware analy.pdf:/home/fmind/Documents/Zotero/storage/7E9GAWQE/Chakradeo et al. - 2013 - MAST triage for market-scale mobile malware analy.pdf:application/pdf}
}

@inproceedings{mariconti_mamadroid:_2017,
	title = {{MaMaDroid}: {Detecting} {Android} {Malware} by {Building} {Markov} {Chains} of {Behavioral} {Models}},
	isbn = {978-1-891562-46-4},
	shorttitle = {{MaMaDroid}},
	url = {https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/mamadroid-detecting-android-malware-building-markov-chains-behavioral-models/},
	doi = {10.14722/ndss.2017.23353},
	abstract = {The rise in popularity of the Android platform has resulted in an explosion of malware threats targeting it. As both Android malware and the operating system itself constantly evolve, it is very challenging to design robust malware mitigation techniques that can operate for long periods of time without the need for modiﬁcations or costly re-training. In this paper, we present MAMADROID, an Android malware detection system that relies on app behavior. MAMADROID builds a behavioral model, in the form of a Markov chain, from the sequence of abstracted API calls performed by an app, and uses it to extract features and perform classiﬁcation. By abstracting calls to their packages or families, MAMADROID maintains resilience to API changes and keeps the feature set size manageable. We evaluate its accuracy on a dataset of 8.5K benign and 35.5K malicious apps collected over a period of six years, showing that it not only effectively detects malware (with up to 99\% F-measure), but also that the model built by the system keeps its detection capabilities for long periods of time (on average, 87\% and 73\% F-measure, respectively, one and two years after training). Finally, we compare against DROIDAPIMINER, a state-of-the-art system that relies on the frequency of API calls performed by apps, showing that MAMADROID signiﬁcantly outperforms it.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {Internet Society},
	author = {Mariconti, Enrico and Onwuzurike, Lucky and Andriotis, Panagiotis and De Cristofaro, Emiliano and Ross, Gordon and Stringhini, Gianluca},
	year = {2017},
	keywords = {detection, static},
	file = {Mariconti et al. - 2017 - MaMaDroid Detecting Android Malware by Building M.pdf:/home/fmind/Documents/Zotero/storage/YRQ7USSP/Mariconti et al. - 2017 - MaMaDroid Detecting Android Malware by Building M.pdf:application/pdf}
}

@article{saracino_madam:_2016,
	title = {{MADAM}: {Effective} and {Efficient} {Behavior}-based {Android} {Malware} {Detection} and {Prevention}},
	abstract = {Android users are constantly threatened by an increasing number of malicious applications (apps), generically called malware. Malware constitutes a serious threat to user privacy, money, device and ﬁle integrity. In this paper we note that, by studying their actions, we can classify malware into a small number of behavioral classes, each of which performs a limited set of misbehaviors that characterize them. These misbehaviors can be deﬁned by monitoring features belonging to different Android levels. In this paper we present MADAM, a novel host-based malware detection system for Android devices which simultaneously analyzes and correlates features at four levels: kernel, application, user and package, to detect and stop malicious behaviors. MADAM has been designed to take into account those behaviors characteristics of almost every real malware which can be found in the wild. MADAM detects and effectively blocks more than 96\% of malicious apps, which come from three large datasets with about 2,800 apps, by exploiting the cooperation of two parallel classiﬁers and a behavioral signature-based detector. Extensive experiments, which also includes the analysis of a testbed of 9,804 genuine apps, have been conducted to show the low false alarm rate, the negligible performance overhead and limited battery consumption.},
	language = {en},
	author = {Saracino, Andrea and Sgandurra, Daniele and Dini, Gianluca and Martinelli, Fabio},
	year = {2016},
	keywords = {detection},
	pages = {14},
	file = {Saracino et al. - MADAM Effective and Efﬁcient Behavior-based Andro.pdf:/home/fmind/Documents/Zotero/storage/8STIWQLW/Saracino et al. - MADAM Effective and Efﬁcient Behavior-based Andro.pdf:application/pdf}
}

@inproceedings{zhang_life_2016,
	title = {Life after {App} {Uninstallation}: {Are} the {Data} {Still} {Alive}? {Data} {Residue} {Attacks} on {Android}},
	isbn = {978-1-891562-41-9},
	shorttitle = {Life after {App} {Uninstallation}},
	url = {https://www.ndss-symposium.org/wp-content/uploads/sites/25/2017/09/life-after-app-installation-data-still-alive-data-residue-attacks-android.pdf},
	doi = {10.14722/ndss.2016.23061},
	abstract = {Uninstalling apps from mobile devices is among the most common user practices on smartphones. It may sound trivial, but the entire process involves multiple system components coordinating to remove the data belonging to the uninstalled app. Despite its frequency and complexity, little has been done to understand the security risks in the app’s uninstallation process. In this project, we have conducted the ﬁrst systematic analysis of Android’s data cleanup mechanism during the app’s uninstallation process. Our analysis reveals that data residues are pervasive in the system after apps are uninstalled. For each identiﬁed data residue instance, we have formulated hypotheses and designed experiments to see whether it can be exploited to compromise the system security. The results are surprising: we have found 12 instances of vulnerabilities caused by data residues. By exploiting them, adversaries can steal user’s online-account credentials, access other app’s private data, escalate privileges, eavesdrop on user’s keystrokes, etc. We call these attacks the data residue attacks.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {Internet Society},
	author = {Zhang, Xiao and Ying, Kailiang and Aafer, Yousra and Qiu, Zhenshen and Du, Wenliang},
	year = {2016},
	keywords = {study},
	file = {Zhang et al. - 2016 - Life after App Uninstallation Are the Data Still .pdf:/home/fmind/Documents/Zotero/storage/U2W2RINT/Zhang et al. - 2016 - Life after App Uninstallation Are the Data Still .pdf:application/pdf}
}

@incollection{hutchison_juxtapp:_2013,
	address = {Berlin, Heidelberg},
	title = {Juxtapp: {A} {Scalable} {System} for {Detecting} {Code} {Reuse} among {Android} {Applications}},
	volume = {7591},
	isbn = {978-3-642-37299-5 978-3-642-37300-8},
	shorttitle = {Juxtapp},
	url = {http://link.springer.com/10.1007/978-3-642-37300-8_4},
	abstract = {Mobile application markets such as the Android Marketplace and the Amazon Android store provide a centralized showcase of applications that end users can purchase or download for free onto their mobile phones. Despite the inﬂux of applications to the markets, applications are either largely unreviewed or only cursorily reviewed by marketplace maintainers due to the vast number of submissions; furthermore, they rely on user policing and reporting to detect misbehaving applications. This reactive approach to application security, especially when programs can contain bugs, malware, or pirated (inauthentic) code, puts too much responsibility on the end users.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Detection of {Intrusions} and {Malware}, and {Vulnerability} {Assessment}},
	publisher = {Springer Berlin Heidelberg},
	author = {Hanna, Steve and Huang, Ling and Wu, Edward and Li, Saung and Chen, Charles and Song, Dawn},
	editor = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard and Flegel, Ulrich and Markatos, Evangelos and Robertson, William},
	year = {2013},
	doi = {10.1007/978-3-642-37300-8_4},
	keywords = {indexing, vt-features},
	pages = {62--81},
	file = {Hanna et al. - 2013 - Juxtapp A Scalable System for Detecting Code Reus.pdf:/home/fmind/Documents/Zotero/storage/6KUPK2LR/Hanna et al. - 2013 - Juxtapp A Scalable System for Detecting Code Reus.pdf:application/pdf}
}

@article{shabtai_intrusion_2010,
	title = {Intrusion detection for mobile devices using the knowledge-based, temporal abstraction method},
	volume = {83},
	issn = {01641212},
	url = {http://linkinghub.elsevier.com/retrieve/pii/S0164121210000762},
	doi = {10.1016/j.jss.2010.03.046},
	language = {en},
	number = {8},
	urldate = {2018-04-10},
	journal = {Journal of Systems and Software},
	author = {Shabtai, Asaf and Kanonov, Uri and Elovici, Yuval},
	month = aug,
	year = {2010},
	keywords = {detection},
	pages = {1524--1537},
	file = {Shabtai et al. - 2010 - Intrusion detection for mobile devices using the k.pdf:/home/fmind/Documents/Zotero/storage/PRVD8VUB/Shabtai et al. - 2010 - Intrusion detection for mobile devices using the k.pdf:application/pdf}
}

@article{wang_hey_2012,
	title = {Hey, {You}, {Get} {Off} of {My} {Market}: {Detecting} {Malicious} {Apps} in {Official} and {Alternative} {Android} {Markets}},
	abstract = {In this paper, we present a systematic study for the detection of malicious applications (or apps) on popular Android Markets. To this end, we ﬁrst propose a permissionbased behavioral footprinting scheme to detect new samples of known Android malware families. Then we apply a heuristics-based ﬁltering scheme to identify certain inherent behaviors of unknown malicious families. We implemented both schemes in a system called DroidRanger. The experiments with 204, 040 apps collected from ﬁve different Android Markets in May-June 2011 reveal 211 malicious ones: 32 from the ofﬁcial Android Market (0.02\% infection rate) and 179 from alternative marketplaces (infection rates ranging from 0.20\% to 0.47\%). Among those malicious apps, our system also uncovered two zero-day malware (in 40 apps): one from the ofﬁcial Android Market and the other from alternative marketplaces. The results show that current marketplaces are functional and relatively healthy. However, there is also a clear need for a rigorous policing process, especially for non-regulated alternative marketplaces.},
	language = {en},
	author = {Wang, Yajin Zhou Zhi and Jiang, Wu Zhou Xuxian},
	year = {2012},
	keywords = {detection, static, permissions},
	pages = {13},
	file = {Wang and Jiang - Hey, You, Get Off of My Market Detecting Maliciou.pdf:/home/fmind/Documents/Zotero/storage/GH89USBZ/Wang and Jiang - Hey, You, Get Off of My Market Detecting Maliciou.pdf:application/pdf}
}

@inproceedings{rasthofer_harvesting_2016,
	title = {Harvesting {Runtime} {Values} in {Android} {Applications} {That} {Feature} {Anti}-{Analysis} {Techniques}},
	isbn = {978-1-891562-41-9},
	url = {https://www.ndss-symposium.org/wp-content/uploads/sites/25/2017/09/harvesting-runtime-values-android-applications-feature-anti-analysis-techniques.pdf},
	doi = {10.14722/ndss.2016.23066},
	abstract = {It is generally challenging to tell apart malware from benign applications. To make this decision, human analysts are frequently interested in runtime values: targets of reﬂective method calls, URLs to which data is sent, target telephone numbers of SMS messages, and many more. However, obfuscation and string encryption, used by malware as well as goodware, often not only render human inspections, but also static analyses ineffective. In addition, malware frequently tricks dynamic analyses by detecting the execution environment emulated by the analysis tool and then refraining from malicious behavior.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {Internet Society},
	author = {Rasthofer, Siegfried and Arzt, Steven and Miltenberger, Marc and Bodden, Eric},
	year = {2016},
	keywords = {dynamic, characterization, obfuscation},
	file = {Rasthofer et al. - 2016 - Harvesting Runtime Values in Android Applications .pdf:/home/fmind/Documents/Zotero/storage/5ADZGMC9/Rasthofer et al. - 2016 - Harvesting Runtime Values in Android Applications .pdf:application/pdf}
}

@inproceedings{afonso_going_2016,
	title = {Going {Native}: {Using} a {Large}-{Scale} {Analysis} of {Android} {Apps} to {Create} a {Practical} {Native}-{Code} {Sandboxing} {Policy}},
	isbn = {978-1-891562-41-9},
	shorttitle = {Going {Native}},
	url = {https://www.ndss-symposium.org/wp-content/uploads/sites/25/2017/09/going-native-large-scale-analysis-android-apps-practical-native-code-sandboxing-policy.pdf},
	doi = {10.14722/ndss.2016.23384},
	abstract = {Current static analysis techniques for Android applications operate at the Java level—that is, they analyze either the Java source code or the Dalvik bytecode. However, Android allows developers to write code in C or C++ that is cross-compiled to multiple binary architectures. Furthermore, the Java-written components and the native code components (C or C++) can interact.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {Internet Society},
	author = {Afonso, Vitor and Bianchi, Antonio and Fratantonio, Yanick and Doupe, Adam and Polino, Mario and de Geus, Paulo and Kruegel, Christopher and Vigna, Giovanni},
	year = {2016},
	keywords = {static, study},
	file = {Afonso et al. - 2016 - Going Native Using a Large-Scale Analysis of Andr.pdf:/home/fmind/Documents/Zotero/storage/K3IMJL6M/Afonso et al. - 2016 - Going Native Using a Large-Scale Analysis of Andr.pdf:application/pdf}
}

@article{chen_finding_2015,
	title = {Finding {Unknown} {Malice} in 10 {Seconds}: {Mass} {Vetting} for {New} {Threats} at the {Google}-{Play} {Scale}},
	abstract = {An app market’s vetting process is expected to be scalable and effective. However, today’s vetting mechanisms are slow and less capable of catching new threats. In our research, we found that a more powerful solution can be found by exploiting the way Android malware is constructed and disseminated, which is typically through repackaging legitimate apps with similar malicious components. As a result, such attack payloads often stand out from those of the same repackaging origin and also show up in the apps not supposed to relate to each other.},
	language = {en},
	author = {Chen, Kai and Wang, Peng and Lee, Yeonjoon and Wang, XiaoFeng and Zhang, Nan and Huang, Heqing and Zou, Wei and Liu, Peng},
	year = {2015},
	keywords = {detection, indexing},
	pages = {16},
	file = {Chen et al. - Finding Unknown Malice in 10 Seconds Mass Vetting.pdf:/home/fmind/Documents/Zotero/storage/SXIAYGMM/Chen et al. - Finding Unknown Malice in 10 Seconds Mass Vetting.pdf:application/pdf}
}

@inproceedings{zhou_fast_2013,
	title = {Fast, scalable detection of "{Piggybacked}" mobile applications},
	isbn = {978-1-4503-1890-7},
	url = {http://dl.acm.org/citation.cfm?doid=2435349.2435377},
	doi = {10.1145/2435349.2435377},
	abstract = {Mobile applications (or apps) are rapidly growing in number and variety. These apps provide useful features, but also bring certain privacy and security risks. For example, malicious authors may attach destructive payloads to legitimate apps to create so-called “piggybacked” apps and advertise them in various app markets to infect unsuspecting users. To detect them, existing approaches typically employ pair-wise comparison, which unfortunately has limited scalability. In this paper, we present a fast and scalable approach to detect these apps in existing Android markets. Based on the fact that the attached payload is not an integral part of a given app’s primary functionality, we propose a module decoupling technique to partition an app’s code into primary and non-primary modules. Also, noticing that piggybacked apps share the same primary modules as the original apps, we develop a feature ﬁngerprint technique to extract various semantic features (from primary modules) and convert them into feature vectors. We then construct a metric space and propose a linearithmic search algorithm (with O(n log n) time complexity) to efﬁciently and scalably detect piggybacked apps. We have implemented a prototype and used it to study 84, 767 apps collected from various Android markets in 2011. Our results show that the processing of these apps takes less than nine hours on a single machine. In addition, among these markets, piggybacked apps range from 0.97\% to 2.7\% (the ofﬁcial Android Market has 1\%). Further investigation shows that they are mainly used to steal ad revenue from the original developers and implant malicious payloads (e.g., for remote bot control). These results demonstrate the effectiveness and scalability of our approach.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Zhou, Wu and Zhou, Yajin and Grace, Michael and Jiang, Xuxian and Zou, Shihong},
	year = {2013},
	keywords = {detection, piggybacking, vt-features},
	pages = {185},
	file = {Zhou et al. - 2013 - Fast, scalable detection of Piggybacked mobile a.pdf:/home/fmind/Documents/Zotero/storage/8WKL7LE6/Zhou et al. - 2013 - Fast, scalable detection of Piggybacked mobile a.pdf:application/pdf}
}

@inproceedings{roy_experimental_2015,
	title = {Experimental {Study} with {Real}-world {Data} for {Android} {App} {Security} {Analysis} using {Machine} {Learning}},
	isbn = {978-1-4503-3682-6},
	url = {http://dl.acm.org/citation.cfm?doid=2818000.2818038},
	doi = {10.1145/2818000.2818038},
	abstract = {Although Machine Learning (ML) based approaches have shown promise for Android malware detection, a set of critical challenges remain unaddressed. Some of those challenges arise in relation to proper evaluation of the detection approach while others are related to the design decisions of the same. In this paper, we systematically study the impact of these challenges as a set of research questions (i.e., hypotheses). We design an experimentation framework where we can reliably vary several parameters while evaluating ML-based Android malware detection approaches. The results from the experiments are then used to answer the research questions. Meanwhile, we also demonstrate the impact of some challenges on some existing ML-based approaches. The large (market-scale) dataset (benign and malicious apps) we use in the above experiments represents the real-world Android app security analysis scale. We envision this study to encourage the practice of employing a better evaluation strategy and better designs of future ML-based approaches for Android malware detection.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Roy, Sankardas and DeLoach, Jordan and Li, Yuping and Herndon, Nic and Caragea, Doina and Ou, Xinming and Ranganath, Venkatesh Prasad and Li, Hongmin and Guevara, Nicolais},
	year = {2015},
	keywords = {detection, study},
	pages = {81--90},
	file = {Roy et al. - 2015 - Experimental Study with Real-world Data for Androi.pdf:/home/fmind/Documents/Zotero/storage/VFQ2AH5J/Roy et al. - 2015 - Experimental Study with Real-world Data for Androi.pdf:application/pdf}
}

@article{yan_droidscope:_2012,
	title = {{DroidScope}: {Seamlessly} {Reconstructing} the {OS} and {Dalvik} {Semantic} {Views} for {Dynamic} {Android} {Malware} {Analysis}},
	abstract = {The prevalence of mobile platforms, the large market share of Android, plus the openness of the Android Market makes it a hot target for malware attacks. Once a malware sample has been identiﬁed, it is critical to quickly reveal its malicious intent and inner workings. In this paper we present DroidScope, an Android analysis platform that continues the tradition of virtualization-based malware analysis. Unlike current desktop malware analysis platforms, DroidScope reconstructs both the OSlevel and Java-level semantics simultaneously and seamlessly. To facilitate custom analysis, DroidScope exports three tiered APIs that mirror the three levels of an Android device: hardware, OS and Dalvik Virtual Machine. On top of DroidScope, we further developed several analysis tools to collect detailed native and Dalvik instruction traces, proﬁle API-level activity, and track information leakage through both the Java and native components using taint analysis. These tools have proven to be effective in analyzing real world malware samples and incur reasonably low performance overheads.},
	language = {en},
	author = {Yan, Lok Kwong and Yin, Heng},
	year = {2012},
	keywords = {dynamic, characterization, leaks},
	pages = {16},
	file = {Yan and Yin - DroidScope Seamlessly Reconstructing the OS and D.pdf:/home/fmind/Documents/Zotero/storage/F763J8QY/Yan and Yin - DroidScope Seamlessly Reconstructing the OS and D.pdf:application/pdf}
}

@article{alam_droidnative:_2016,
	title = {{DroidNative}: {Semantic}-{Based} {Detection} of {Android} {Native} {Code} {Malware}},
	abstract = {According to the Symantec and F-Secure threat reports, mobile malware development in 2013 and 2014 has continued to focus almost exclusively (∼99\%) on the Android platform. Malware writers are applying stealthy mutations (obfuscations) to create malware variants, thwarting detection by signature based detectors. In addition, the plethora of more sophisticated detectors making use of static analysis techniques to detect such variants operate only at the bytecode level, meaning that malware embedded in native code goes undetected. A recent study shows that 86\% of the most popular Android applications contain native code, making this a plausible threat. This paper proposes DroidNative, an Android malware detector that uses speciﬁc control ﬂow patterns to reduce the eﬀect of obfuscations, provides automation and platform independence, and as far as we know is the ﬁrst system that operates at the Android native code level, allowing it to detect malware embedded in both native code and bytecode. When tested with traditional malware variants it achieves a detection rate (DR) of 99.48\%, compared to academic and commercial tools’ DRs that range from 8.33\% –93.22\%. When tested with a dataset of 2240 samples DroidNative achieves a DR of 99.16\%, a false positive rate of 1.3\% and an average detection time of 26.87 sec/sample.},
	language = {en},
	author = {Alam, Shahid and Qu, Zhengyang and Riley, Ryan and Chen, Yan and Rastogi, Vaibhav},
	month = feb,
	year = {2016},
	keywords = {detection, static},
	pages = {18},
	file = {Alam et al. - DroidNative Semantic-Based Detection of Android N.pdf:/home/fmind/Documents/Zotero/storage/RV4E6IVP/Alam et al. - DroidNative Semantic-Based Detection of Android N.pdf:application/pdf}
}

@incollection{kutylowski_droidminer:_2014,
	address = {Cham},
	title = {{DroidMiner}: {Automated} {Mining} and {Characterization} of {Fine}-grained {Malicious} {Behaviors} in {Android} {Applications}},
	volume = {8712},
	isbn = {978-3-319-11202-2 978-3-319-11203-9},
	shorttitle = {{DroidMiner}},
	url = {http://link.springer.com/10.1007/978-3-319-11203-9_10},
	abstract = {Most existing malicious Android app detection approaches rely on manually selected detection heuristics, features, and models. In this paper, we describe a new, complementary system, called DroidMiner, which uses static analysis to automatically mine malicious program logic from known Android malware, abstracts this logic into a sequence of threat modalities, and then seeks out these threat modality patterns in other unknown (or newly published) Android apps. We formalize a two-level behavioral graph representation used to capture Android app program logic, and design new techniques to identify and label elements of the graph that capture malicious behavioral patterns (or malicious modalities). After the automatic learning of these malicious behavioral models, DroidMiner can scan a new Android app to (i) determine whether it contains malicious modalities, (ii) diagnose the malware family to which it is most closely associated, (iii) and provide further evidence as to why the app is considered to be malicious by including a concise description of identiﬁed malicious behaviors. We evaluate DroidMiner using 2,466 malicious apps, identiﬁed from a corpus of over 67,000 third-party market Android apps, plus an additional set of over 10,000 ofﬁcial market Android apps. Using this set of real-world apps, we demonstrate that DroidMiner achieves a 95.3\% detection rate, with only a 0.4\% false positive rate. We further evaluate DroidMiner’s ability to classify malicious apps under their proper family labels, and measure its label accuracy at 92\%.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Computer {Security} - {ESORICS} 2014},
	publisher = {Springer International Publishing},
	author = {Yang, Chao and Xu, Zhaoyan and Gu, Guofei and Yegneswaran, Vinod and Porras, Phillip},
	editor = {Kutyłowski, Mirosław and Vaidya, Jaideep},
	year = {2014},
	doi = {10.1007/978-3-319-11203-9_10},
	keywords = {detection, static, characterization, vt-features},
	pages = {163--182},
	file = {Yang et al. - 2014 - DroidMiner Automated Mining and Characterization .pdf:/home/fmind/Documents/Zotero/storage/H7KAJD8A/Yang et al. - 2014 - DroidMiner Automated Mining and Characterization .pdf:application/pdf}
}

@inproceedings{wu_droidmat:_2012,
	title = {{DroidMat}: {Android} {Malware} {Detection} through {Manifest} and {API} {Calls} {Tracing}},
	isbn = {978-1-4673-2261-4 978-0-7695-4776-3},
	shorttitle = {{DroidMat}},
	url = {http://ieeexplore.ieee.org/document/6298136/},
	doi = {10.1109/AsiaJCIS.2012.18},
	abstract = {Recently, the threat of Android malware is spreading rapidly, especially those repackaged Android malware. Although understanding Android malware using dynamic analysis can provide a comprehensive view, it is still subjected to high cost in environment deployment and manual efforts in investigation. In this study, we propose a static feature-based mechanism to provide a static analyst paradigm for detecting the Android malware. The mechanism considers the static information including permissions, deployment of components, Intent messages passing and API calls for characterizing the Android applications behavior. In order to recognize different intentions of Android malware, different kinds of clustering algorithms can be applied to enhance the malware modeling capability.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Wu, Dong-Jie and Mao, Ching-Hao and Wei, Te-En and Lee, Hahn-Ming and Wu, Kuo-Ping},
	month = aug,
	year = {2012},
	keywords = {detection, static},
	pages = {62--69},
	file = {Wu et al. - 2012 - DroidMat Android Malware Detection through Manife.pdf:/home/fmind/Documents/Zotero/storage/43WVKMWU/Wu et al. - 2012 - DroidMat Android Malware Detection through Manife.pdf:application/pdf}
}

@article{yuan_droiddetector:_2016,
	title = {{DroidDetector}: android malware characterization and detection using deep learning},
	volume = {21},
	issn = {1007-0214},
	shorttitle = {Droiddetector},
	url = {http://ieeexplore.ieee.org/document/7399288/},
	doi = {10.1109/TST.2016.7399288},
	abstract = {Smartphones and mobile tablets are rapidly becoming indispensable in daily life. Android has been the most popular mobile operating system since 2012. However, owing to the open nature of Android, countless malwares are hidden in a large number of benign apps in Android markets that seriously threaten Android security. Deep learning is a new area of machine learning research that has gained increasing attention in artiﬁcial intelligence. In this study, we propose to associate the features from the static analysis with features from dynamic analysis of Android apps and characterize malware using deep learning techniques. We implement an online deep-learning-based Android malware detection engine (DroidDetector) that can automatically detect whether an app is a malware or not. With thousands of Android apps, we thoroughly test DroidDetector and perform an indepth analysis on the features that deep learning essentially exploits to characterize malware. The results show that deep learning is suitable for characterizing Android malware and especially effective with the availability of more training data. DroidDetector can achieve 96.76\% detection accuracy, which outperforms traditional machine learning techniques. An evaluation of ten popular anti-virus softwares demonstrates the urgency of advancing our capabilities in Android malware detection.},
	language = {en},
	number = {1},
	urldate = {2018-04-10},
	journal = {Tsinghua Science and Technology},
	author = {Yuan, Zhenlong and Lu, Yongqiang and Xue, Yibo},
	month = feb,
	year = {2016},
	keywords = {detection, static, dynamic},
	pages = {114--123},
	file = {Yuan et al. - 2016 - Droiddetector android malware characterization an.pdf:/home/fmind/Documents/Zotero/storage/DHDDNMTR/Yuan et al. - 2016 - Droiddetector android malware characterization an.pdf:application/pdf}
}

@inproceedings{rastogi_droidchameleon:_2013,
	title = {{DroidChameleon}: evaluating {Android} anti-malware against transformation attacks},
	isbn = {978-1-4503-1767-2},
	shorttitle = {{DroidChameleon}},
	url = {http://dl.acm.org/citation.cfm?doid=2484313.2484355},
	doi = {10.1145/2484313.2484355},
	abstract = {Mobile malware threats (e.g., on Android) have recently become a real concern. In this paper, we evaluate the state-of-the-art commercial mobile anti-malware products for Android and test how resistant they are against various common obfuscation techniques (even with known malware). Such an evaluation is important for not only measuring the available defense against mobile malware threats but also proposing effective, next-generation solutions. We developed DroidChameleon, a systematic framework with various transformation techniques, and used it for our study. Our results on ten popular commercial anti-malware applications for Android are worrisome: none of these tools is resistant against common malware transformation techniques. Moreover, the transformations are simple in most cases and anti-malware tools make little effort to provide transformation-resilient detection. Finally, in the light of our results, we propose possible remedies for improving the current state of malware detection on mobile devices.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Rastogi, Vaibhav and Chen, Yan and Jiang, Xuxian},
	year = {2013},
	keywords = {study, obfuscation},
	pages = {329},
	file = {Rastogi et al. - 2013 - DroidChameleon evaluating Android anti-malware ag.pdf:/home/fmind/Documents/Zotero/storage/FNY8B9QV/Rastogi et al. - 2013 - DroidChameleon evaluating Android anti-malware ag.pdf:application/pdf}
}

@incollection{zia_droidapiminer:_2013,
	address = {Cham},
	title = {{DroidAPIMiner}: {Mining} {API}-{Level} {Features} for {Robust} {Malware} {Detection} in {Android}},
	volume = {127},
	isbn = {978-3-319-04282-4 978-3-319-04283-1},
	shorttitle = {{DroidAPIMiner}},
	url = {http://link.springer.com/10.1007/978-3-319-04283-1_6},
	abstract = {The increasing popularity of Android apps makes them the target of malware authors. To defend against this severe increase of Android malwares and help users make a better evaluation of apps at install time, several approaches have been proposed. However, most of these solutions suﬀer from some shortcomings; computationally expensive, not general or not robust enough. In this paper, we aim to mitigate Android malware installation through providing robust and lightweight classiﬁers. We have conducted a thorough analysis to extract relevant features to malware behavior captured at API level, and evaluated diﬀerent classiﬁers using the generated feature set. Our results show that we are able to achieve an accuracy as high as 99\% and a false positive rate as low as 2.2\% using KNN classiﬁer.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Security and {Privacy} in {Communication} {Networks}},
	publisher = {Springer International Publishing},
	author = {Aafer, Yousra and Du, Wenliang and Yin, Heng},
	editor = {Zia, Tanveer and Zomaya, Albert and Varadharajan, Vijay and Mao, Morley},
	year = {2013},
	doi = {10.1007/978-3-319-04283-1_6},
	keywords = {detection, static},
	pages = {86--103},
	file = {Aafer et al. - 2013 - DroidAPIMiner Mining API-Level Features for Robus.pdf:/home/fmind/Documents/Zotero/storage/EMU4RJZG/Aafer et al. - 2013 - DroidAPIMiner Mining API-Level Features for Robus.pdf:application/pdf}
}

@inproceedings{arp_drebin:_2014,
	title = {Drebin: {Effective} and {Explainable} {Detection} of {Android} {Malware} in {Your} {Pocket}},
	isbn = {978-1-891562-35-8},
	shorttitle = {Drebin},
	url = {https://www.ndss-symposium.org/ndss2014/programme/drebin-effective-and-explainable-detection-android-malware-your-pocket/},
	doi = {10.14722/ndss.2014.23247},
	abstract = {Malicious applications pose a threat to the security of the Android platform. The growing amount and diversity of these applications render conventional defenses largely ineffective and thus Android smartphones often remain unprotected from novel malware. In this paper, we propose DREBIN, a lightweight method for detection of Android malware that enables identifying malicious applications directly on the smartphone. As the limited resources impede monitoring applications at run-time, DREBIN performs a broad static analysis, gathering as many features of an application as possible. These features are embedded in a joint vector space, such that typical patterns indicative for malware can be automatically identiﬁed and used for explaining the decisions of our method. In an evaluation with 123,453 applications and 5,560 malware samples DREBIN outperforms several related approaches and detects 94\% of the malware with few false alarms, where the explanations provided for each detection reveal relevant properties of the detected malware. On ﬁve popular smartphones, the method requires 10 seconds for an analysis on average, rendering it suitable for checking downloaded applications directly on the device.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {Internet Society},
	author = {Arp, Daniel and Spreitzenbarth, Michael and Hübner, Malte and Gascon, Hugo and Rieck, Konrad},
	year = {2014},
	keywords = {detection, static, vt-features, vt-analysis, vt-labels, dataset},
	file = {Arp et al. - 2014 - Drebin Effective and Explainable Detection of And.pdf:/home/fmind/Documents/Zotero/storage/VACGUJ9L/Arp et al. - 2014 - Drebin Effective and Explainable Detection of And.pdf:application/pdf}
}

@article{oberheide_dissecting_2012,
	title = {Dissecting the {Android} {Bouncer}},
	language = {en},
	author = {Oberheide, Jon},
	year = {2012},
	keywords = {study, dynamic, dataset},
	pages = {62},
	file = {Oberheide - 2012 - Dissecting the Android Bouncer.pdf:/home/fmind/Documents/Zotero/storage/34X2YVRC/Oberheide - 2012 - Dissecting the Android Bouncer.pdf:application/pdf}
}

@inproceedings{zhou_dissecting_2012,
	title = {Dissecting {Android} {Malware}: {Characterization} and {Evolution}},
	isbn = {978-1-4673-1244-8 978-0-7695-4681-0},
	shorttitle = {Dissecting {Android} {Malware}},
	url = {http://ieeexplore.ieee.org/document/6234407/},
	doi = {10.1109/SP.2012.16},
	abstract = {The popularity and adoption of smartphones has greatly stimulated the spread of mobile malware, especially on the popular platforms such as Android. In light of their rapid growth, there is a pressing need to develop effective solutions. However, our defense capability is largely constrained by the limited understanding of these emerging mobile malware and the lack of timely access to related samples.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Zhou, Yajin and Jiang, Xuxian},
	month = may,
	year = {2012},
	keywords = {classification, vt-features, reverse, vt-analysis, vt-labels, dataset},
	pages = {95--109},
	file = {Zhou and Jiang - 2012 - Dissecting Android Malware Characterization and E.pdf:/home/fmind/Documents/Zotero/storage/TY4S74IM/Zhou and Jiang - 2012 - Dissecting Android Malware Characterization and E.pdf:application/pdf}
}

@incollection{pernul_dexhunter:_2015,
	address = {Cham},
	title = {{DexHunter}: {Toward} {Extracting} {Hidden} {Code} from {Packed} {Android} {Applications}},
	volume = {9327},
	isbn = {978-3-319-24176-0 978-3-319-24177-7},
	shorttitle = {{DexHunter}},
	url = {http://link.springer.com/10.1007/978-3-319-24177-7_15},
	abstract = {The rapid growth of mobile application (or simply app) economy provides lucrative and proﬁtable targets for hackers. Among OWASP’s top ten mobile risks for 2014, the lack of binary protections makes it easy to reverse, modify, and repackage Android apps. Recently, a number of packing services have been proposed to protect Android apps by hiding the original executable ﬁle (i.e., dex ﬁle). However, little is known about their eﬀectiveness and eﬃciency. In this paper, we perform the ﬁrst systematic investigation on such services by answering two questions: (1) what are the major techniques used by these services and their eﬀects on apps? (2) can the original dex ﬁle in a packed app be recovered? If yes, how? We not only reveal their techniques and evaluate their eﬀects, but also propose and develop a novel system, named DexHunter, to extract dex ﬁles protected by these services. It is worth noting that DexHunter supports both the Dalvik virtual machine (DVM) and the new Android Runtime (ART). The experimental results show that DexHunter can extract dex ﬁles from packed apps eﬀectively and eﬃciently.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Computer {Security} -- {ESORICS} 2015},
	publisher = {Springer International Publishing},
	author = {Zhang, Yueqian and Luo, Xiapu and Yin, Haoyang},
	editor = {Pernul, Günther and Y A Ryan, Peter and Weippl, Edgar},
	year = {2015},
	doi = {10.1007/978-3-319-24177-7_15},
	keywords = {static},
	pages = {293--311},
	file = {Zhang et al. - 2015 - DexHunter Toward Extracting Hidden Code from Pack:/home/fmind/Documents/Zotero/storage/A3K3JVC3/Zhang et al. - 2015 - DexHunter Toward Extracting Hidden Code from Pack:application/pdf}
}

@inproceedings{zhou_detecting_2012,
	title = {Detecting repackaged smartphone applications in third-party android marketplaces},
	isbn = {978-1-4503-1091-8},
	url = {http://dl.acm.org/citation.cfm?doid=2133601.2133640},
	doi = {10.1145/2133601.2133640},
	abstract = {Recent years have witnessed incredible popularity and adoption of smartphones and mobile devices, which is accompanied by large amount and wide variety of feature-rich smartphone applications. These smartphone applications (or apps), typically organized in different application marketplaces, can be conveniently browsed by mobile users and then simply clicked to install on a variety of mobile devices. In practice, besides the ofﬁcial marketplaces from platform vendors (e.g., Google and Apple), a number of third-party alternative marketplaces have also been created to host thousands of apps (e.g., to meet regional or localization needs). To maintain and foster a hygienic smartphone app ecosystem, there is a need for each third-party marketplace to offer quality apps to mobile users.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Zhou, Wu and Zhou, Yajin and Jiang, Xuxian and Ning, Peng},
	year = {2012},
	keywords = {detection, piggybacking, indexing, vt-features},
	pages = {317},
	file = {Zhou et al. - 2012 - Detecting repackaged smartphone applications in th.pdf:/home/fmind/Documents/Zotero/storage/Q39NJLCC/Zhou et al. - 2012 - Detecting repackaged smartphone applications in th.pdf:application/pdf}
}

@article{jang_detecting_2016,
	title = {Detecting and classifying method based on similarity matching of {Android} malware behavior with profile},
	volume = {5},
	issn = {2193-1801},
	url = {http://www.springerplus.com/content/5/1/273},
	doi = {10.1186/s40064-016-1861-x},
	abstract = {Mass­market mobile security threats have increased recently due to the growth of mobile technologies and the popularity of mobile devices. Accordingly, techniques have been introduced for identifying, classifying, and defending against mobile threats utilizing static, dynamic, on­device, and off­device techniques. Static techniques are easy to evade, while dynamic techniques are expensive. On­device techniques are evasion, while off­device techniques need being always online. To address some of those shortcomings, we introduce Andro­profiler, a hybrid behavior based analysis and classification system for mobile malware. Andro­profiler main goals are efficiency, scalability, and accuracy. For that, Andro­profiler classifies malware by exploiting the behavior profiling extracted from the integrated system logs including system calls. Andro­profiler executes a malicious application on an emulator in order to generate the integrated system logs, and creates human­readable behavior profiles by analyzing the integrated system logs. By comparing the behavior profile of malicious application with representative behavior profile for each malware family using a weighted similarity matching technique, Andro­profiler detects and classifies it into malware families. The experiment results demonstrate that Andro­profiler is scalable, performs well in detecting and classifying malware with accuracy greater than 98 \%, outperforms the existing state­of­the­art work, and is capable of identifying 0­day mobile malware samples.},
	language = {en},
	number = {1},
	urldate = {2018-04-10},
	journal = {SpringerPlus},
	author = {Jang, Jae-wook and Yun, Jaesung and Mohaisen, Aziz and Woo, Jiyoung and Kim, Huy Kang},
	month = dec,
	year = {2016},
	keywords = {classification, dynamic},
	file = {Jang et al. - 2016 - Detecting and classifying method based on similari.pdf:/home/fmind/Documents/Zotero/storage/46MAY5ZW/Jang et al. - 2016 - Detecting and classifying method based on similari.pdf:application/pdf}
}

@article{suarez-tangil_dendroid:_2014,
	title = {Dendroid: {A} text mining approach to analyzing and classifying code structures in {Android} malware families},
	volume = {41},
	issn = {09574174},
	shorttitle = {Dendroid},
	url = {http://linkinghub.elsevier.com/retrieve/pii/S0957417413006088},
	doi = {10.1016/j.eswa.2013.07.106},
	abstract = {The rapid proliferation of smartphones over the last few years has come hand in hand with and impressive growth in the number and sophistication of malicious apps targetting smartphone users. The availability of reuse-oriented development methodologies and automated malware production tools makes exceedingly easy to produce new specimens. As a result, market operators and malware analysts are increasingly overwhelmed by the amount of newly discovered samples that must be analyzed. This situation has stimulated research in intelligent instruments to automate parts of the malware analysis process. In this paper, we introduce Dendroid, a system based on text mining and information retrieval techniques for this task. Our approach is motivated by a statistical analysis of the code structures found in a dataset of Android OS malware families, which reveals some parallelisms with classical problems in those domains. We then adapt the standard Vector Space Model and reformulate the modelling process followed in text mining applications. This enables us to measure similarity between malware samples, which is then used to automatically classify them into families. We also investigate the application of hierarchical clustering over the feature vectors obtained for each malware family. The resulting dendograms resemble the so-called phylogenetic trees for biological species, allowing us to conjecture about evolutionary relationships among families. Our experimental results suggest that the approach is remarkably accurate and deals eﬃciently with large databases of malware instances.},
	language = {en},
	number = {4},
	urldate = {2018-04-10},
	journal = {Expert Systems with Applications},
	author = {Suarez-Tangil, Guillermo and Tapiador, Juan E. and Peris-Lopez, Pedro and Blasco, Jorge},
	month = mar,
	year = {2014},
	keywords = {static, classification},
	pages = {1104--1117},
	file = {Suarez-Tangil et al. - 2014 - Dendroid A text mining approach to analyzing and .pdf:/home/fmind/Documents/Zotero/storage/472XAKKH/Suarez-Tangil et al. - 2014 - Dendroid A text mining approach to analyzing and .pdf:application/pdf}
}

@inproceedings{burguera_crowdroid:_2011,
	title = {Crowdroid: behavior-based malware detection system for {Android}},
	isbn = {978-1-4503-1000-0},
	shorttitle = {Crowdroid},
	url = {http://dl.acm.org/citation.cfm?doid=2046614.2046619},
	doi = {10.1145/2046614.2046619},
	abstract = {The sharp increase in the number of smartphones on the market, with the Android platform posed to becoming a market leader makes the need for malware analysis on this platform an urgent issue.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Burguera, Iker and Zurutuza, Urko and Nadjm-Tehrani, Simin},
	year = {2011},
	keywords = {detection, dynamic},
	pages = {15},
	file = {Burguera et al. - 2011 - Crowdroid behavior-based malware detection system.pdf:/home/fmind/Documents/Zotero/storage/Z6MI9JS4/Burguera et al. - 2011 - Crowdroid behavior-based malware detection system.pdf:application/pdf}
}

@article{ramteke_comparative_2014,
	title = {Comparative {Study} and a {Survey} on {Malware} {Analysis} {Approaches} for {Android} {Devices}},
	abstract = {Android is one of the fast growing technologies used in mobile devices, Smart phone and tablet PC. As the smart phone usage increase the malicious attacks are also increases. So the many research going on mobile security. Malware analysis is one key aspect to protect the devices from malicious attack. Here we give the overview of malware analysis approaches for protecting our devices from malicious attacks. Malware is one of the threat models for mobile devices which access the mobile devices for the purpose of damaging the devices, tracing the secure information etc. Now a day malwares are very drastically evolve due to this, it is very difficult to delete it. Sandbox is a frame work for Android platform used for malware analysis and detection. In this paper we are mostly focusing on different types of existing sandbox approaches or tool to protecting mobile devices. Here we are comparing the malware analysis tools or approaches on the basis of various characteristics which presents the guidelines for selecting the most appropriate approach as per the needs.},
	language = {en},
	journal = {International Journal of Advanced Research in Computer Science and Software Engineering},
	author = {Ramteke, Minakshi and Sen, Praveen and Sapate, Suchit},
	year = {2014},
	keywords = {static, study, dynamic, characterization},
	pages = {7},
	file = {Ramteke et al. - 2014 - Comparative Study and a Survey on Malware Analysis.pdf:/home/fmind/Documents/Zotero/storage/W85LCJN9/Ramteke et al. - 2014 - Comparative Study and a Survey on Malware Analysis.pdf:application/pdf}
}

@article{heuser_asm:_2014,
	title = {{ASM}: {A} {Programmable} {Interface} for {Extending} {Android} {Security}},
	abstract = {Android, iOS, and Windows 8 are changing the application architecture of consumer operating systems. These new architectures required OS designers to rethink security and access control. While the new security architectures improve on traditional desktop and server OS designs, they lack sufﬁcient protection semantics for different classes of OS customers (e.g., consumer, enterprise, and government). The Android OS in particular has seen over a dozen research proposals for security enhancements. This paper seeks to promote OS security extensibility in the Android OS. We propose the Android Security Modules (ASM) framework, which provides a programmable interface for deﬁning new reference monitors for Android. We drive the ASM design by studying the authorization hook requirements of recent security enhancement proposals and identify that new OSes such as Android require new types of authorization hooks (e.g., replacing data). We describe the design and implementation of ASM and demonstrate its utility by developing reference monitors called ASM apps. Finally, ASM is not only beneﬁcial for security researchers. If adopted by Google, we envision ASM enabling in-theﬁeld security enhancement of Android devices without requiring root access, a signiﬁcant limitation of existing bring-your-own-device solutions.},
	language = {en},
	author = {Heuser, Stephan and Enck, William and Nadkarni, Adwait and Sadeghi, Ahmad-Reza},
	month = sep,
	year = {2014},
	keywords = {hardening},
	pages = {15},
	file = {Heuser et al. - ASM A Programmable Interface for Extending Androi.pdf:/home/fmind/Documents/Zotero/storage/EZHTWURP/Heuser et al. - ASM A Programmable Interface for Extending Androi.pdf:application/pdf}
}

@inproceedings{rastogi_are_2016,
	title = {Are these {Ads} {Safe}: {Detecting} {Hidden} {Attacks} through the {Mobile} {App}-{Web} {Interfaces}},
	isbn = {978-1-891562-41-9},
	shorttitle = {Are these {Ads} {Safe}},
	url = {https://www.ndss-symposium.org/wp-content/uploads/sites/25/2017/09/ads-safe-detecting-hidden-attacks-through-mobile-app-web-interfaces.pdf},
	doi = {10.14722/ndss.2016.23234},
	abstract = {Mobile users are increasingly becoming targets of malware infections and scams. Some platforms, such as Android, are more open than others and are therefore easier to exploit than other platforms. In order to curb such attacks it is important to know how these attacks originate. We take a previously unexplored step in this direction and look for the answer at the interface between mobile apps and the Web. Numerous inapp advertisements work at this interface: when the user taps on an advertisement, she is led to a web page which may further redirect until the user reaches the ﬁnal destination. Similarly, applications also embed web links that again lead to the outside Web. Even though the original application may not be malicious, the Web destinations that the user visits could play an important role in propagating attacks.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {Internet Society},
	author = {Rastogi, Vaibhav and Shao, Rui and Chen, Yan and Pan, Xiang and Zou, Shihong and Riley, Ryan},
	year = {2016},
	keywords = {study, advertisements},
	file = {Rastogi et al. - 2016 - Are these Ads Safe Detecting Hidden Attacks throu.pdf:/home/fmind/Documents/Zotero/storage/HV7N8TSK/Rastogi et al. - 2016 - Are these Ads Safe Detecting Hidden Attacks throu.pdf:application/pdf}
}

@inproceedings{rastogi_appsplayground:_2013,
	title = {{AppsPlayground}: automatic security analysis of smartphone applications},
	isbn = {978-1-4503-1890-7},
	shorttitle = {{AppsPlayground}},
	url = {http://dl.acm.org/citation.cfm?doid=2435349.2435379},
	doi = {10.1145/2435349.2435379},
	abstract = {Today’s smartphone application markets host an ever increasing number of applications. The sheer number of applications makes their review a daunting task. We propose AppsPlayground for Android, a framework that automates the analysis smartphone applications. AppsPlayground integrates multiple components comprising diﬀerent detection and automatic exploration techniques for this purpose. We evaluated the system using multiple large scale and small scale experiments involving real benign and malicious applications. Our evaluation shows that AppsPlayground is quite eﬀective at automatically detecting privacy leaks and malicious functionality in applications.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Rastogi, Vaibhav and Chen, Yan and Enck, William},
	year = {2013},
	keywords = {dynamic, characterization, leaks},
	pages = {209},
	file = {Rastogi et al. - 2013 - AppsPlayground automatic security analysis of sma.pdf:/home/fmind/Documents/Zotero/storage/TY7LSMI2/Rastogi et al. - 2013 - AppsPlayground automatic security analysis of sma.pdf:application/pdf}
}

@inproceedings{lindorfer_andrubis_2014,
	title = {{ANDRUBIS} -- 1,000,000 {Apps} {Later}: {A} {View} on {Current} {Android} {Malware} {Behaviors}},
	isbn = {978-1-4799-8308-7 978-1-4799-8309-4},
	shorttitle = {{ANDRUBIS} -- 1,000,000 {Apps} {Later}},
	url = {http://ieeexplore.ieee.org/document/7446031/},
	doi = {10.1109/BADGERS.2014.7},
	abstract = {Android is the most popular smartphone operating system with a market share of 80\%, but as a consequence, also the platform most targeted by malware. To deal with the increasing number of malicious Android apps in the wild, malware analysts typically rely on analysis tools to extract characteristic information about an app in an automated fashion. While the importance of such tools has been addressed by the research community, the resulting prototypes remain limited in terms of analysis capabilities and availability.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Lindorfer, Martina and Neugschwandtner, Matthias and Weichselbaum, Lukas and Fratantonio, Yanick and Veen, Victor van der and Platzer, Christian},
	month = sep,
	year = {2014},
	keywords = {static, study, dynamic, characterization},
	pages = {3--17},
	file = {Lindorfer et al. - 2014 - ANDRUBIS -- 1,000,000 Apps Later A View on Curren.pdf:/home/fmind/Documents/Zotero/storage/6WCIP2YW/Lindorfer et al. - 2014 - ANDRUBIS -- 1,000,000 Apps Later A View on Curren.pdf:application/pdf}
}

@article{weichselbaum_andrubis:_2014,
	title = {{ANDRUBIS}: {Android} {Malware} {Under} {The} {Magnifying} {Glass}},
	abstract = {The smartphone industry has been one of the fastest growing technological areas in recent years. Naturally, the considerable market share of the Android OS and the diversity of app distribution channels besides the ofﬁcial Google Play Store has attracted the attention of malware authors. To deal with the increasing numbers of malicious Android apps in the wild, malware analysts typically rely on analysis tools to extract characteristic information about an app in an automated fashion. While the importance of such tools has been addressed by the research community [8], [25], [26], [28], the resulting prototypes remain limited in terms of analysis capabilities and availability. In this paper we present ANDRUBIS, a completely automated, publicly available and comprehensive analysis system for Android applications. ANDRUBIS combines static analysis techniques with dynamic analysis on both Dalvik VM and system level, as well as several stimulation techniques to increase code coverage.},
	language = {en},
	author = {Weichselbaum, Lukas and Neugschwandtner, Matthias and Lindorfer, Martina and Fratantonio, Yanick},
	year = {2014},
	keywords = {static, dynamic, characterization},
	pages = {10},
	file = {Weichselbaum et al. - Android Malware Under The Magnifying Glass.pdf:/home/fmind/Documents/Zotero/storage/3YCAY6IT/Weichselbaum et al. - Android Malware Under The Magnifying Glass.pdf:application/pdf}
}

@inproceedings{felt_android_2011,
	title = {Android permissions demystified},
	isbn = {978-1-4503-0948-6},
	url = {http://dl.acm.org/citation.cfm?doid=2046707.2046779},
	doi = {10.1145/2046707.2046779},
	abstract = {Android provides third-party applications with an extensive API that includes access to phone hardware, settings, and user data. Access to privacy- and security-relevant parts of the API is controlled with an install-time application permission system. We study Android applications to determine whether Android developers follow least privilege with their permission requests. We built Stowaway, a tool that detects overprivilege in compiled Android applications. Stowaway determines the set of API calls that an application uses and then maps those API calls to permissions. We used automated testing tools on the Android API in order to build the permission map that is necessary for detecting overprivilege. We apply Stowaway to a set of 940 applications and ﬁnd that about one-third are overprivileged. We investigate the causes of overprivilege and ﬁnd evidence that developers are trying to follow least privilege but sometimes fail due to insuﬃcient API documentation.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Felt, Adrienne Porter and Chin, Erika and Hanna, Steve and Song, Dawn and Wagner, David},
	year = {2011},
	keywords = {permissions},
	pages = {627},
	file = {Felt et al. - 2011 - Android permissions demystified.pdf:/home/fmind/Documents/Zotero/storage/EB5XRU46/Felt et al. - 2011 - Android permissions demystified.pdf:application/pdf}
}

@inproceedings{sarma_android_2012,
	title = {Android permissions: a perspective combining risks and benefits},
	isbn = {978-1-4503-1295-0},
	shorttitle = {Android permissions},
	url = {http://dl.acm.org/citation.cfm?doid=2295136.2295141},
	doi = {10.1145/2295136.2295141},
	abstract = {The phenomenal growth of the Android platform in the past few years has made it a lucrative target of malicious application (app) developers. There are numerous instances of malware apps that send premium rate SMS messages, track users’ private data, or apps that, even if not characterized as malware, conduct questionable actions affecting the user’s privacy or costing them money. In this paper, we investigate the feasibility of using both the permissions an app requests, the category of the app, and what permissions are requested by other apps in the same category to better inform users whether the risks of installing an app is commensurate with its expected beneﬁt. Existing approaches consider only the risks of the permissions requested by an app and ignore both the beneﬁts and what permissions are requested by other apps, thus having a limited effect. We propose several risk signals that and evaluate them using two datasets, one consists of 158,062 Android apps from the Android Market, and another consists of 121 malicious apps. We demonstrate the effectiveness of our proposal through extensive data analysis.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Sarma, Bhaskar Pratim and Li, Ninghui and Gates, Chris and Potharaju, Rahul and Nita-Rotaru, Cristina and Molloy, Ian},
	year = {2012},
	keywords = {permissions},
	pages = {13},
	file = {Sarma et al. - 2012 - Android permissions a perspective combining risks.pdf:/home/fmind/Documents/Zotero/storage/H9ZI7H7H/Sarma et al. - 2012 - Android permissions a perspective combining risks.pdf:application/pdf}
}

@incollection{hutchison_andarwin:_2013,
	address = {Berlin, Heidelberg},
	title = {{AnDarwin}: {Scalable} {Detection} of {Semantically} {Similar} {Android} {Applications}},
	volume = {8134},
	isbn = {978-3-642-40202-9 978-3-642-40203-6},
	shorttitle = {{AnDarwin}},
	url = {http://link.springer.com/10.1007/978-3-642-40203-6_11},
	abstract = {The popularity and utility of smartphones rely on their vibrant application markets; however, plagiarism threatens the long-term health of these markets. We present a scalable approach to detecting similar Android apps based on their semantic information. We implement our approach in a tool called AnDarwin and evaluate it on 265,359 apps collected from 17 markets including Google Play and numerous thirdparty markets. In contrast to earlier approaches, AnDarwin has four advantages: it avoids comparing apps pairwise, thus greatly improving its scalability; it analyzes only the app code and does not rely on other information — such as the app’s market, signature, or description —thus greatly increasing its reliability; it can detect both full and partial app similarity; and it can automatically detect library code and remove it from the similarity analysis. We present two use cases for AnDarwin: ﬁnding similar apps by diﬀerent developers (“clones”) and similar apps from the same developer (“rebranded”). In ten hours, AnDarwin detected at least 4,295 apps that have been the victims of cloning and 36,106 apps that are rebranded. By analyzing the clusters found by AnDarwin, we found 88 new variants of malware and identiﬁed 169 malicious apps based on diﬀerences in the requested permissions. Our evaluation demonstrates AnDarwin’s ability to accurately detect similar apps on a large scale.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Computer {Security} – {ESORICS} 2013},
	publisher = {Springer Berlin Heidelberg},
	author = {Crussell, Jonathan and Gibler, Clint and Chen, Hao},
	editor = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard and Crampton, Jason and Jajodia, Sushil and Mayes, Keith},
	year = {2013},
	doi = {10.1007/978-3-642-40203-6_11},
	keywords = {detection, piggybacking, vt-features},
	pages = {182--199},
	file = {Crussell et al. - 2013 - AnDarwin Scalable Detection of Semantically Simil.pdf:/home/fmind/Documents/Zotero/storage/SVJPP8X7/Crussell et al. - 2013 - AnDarwin Scalable Detection of Semantically Simil.pdf:application/pdf}
}

@inproceedings{li_investigation_2016,
	title = {An {Investigation} into the {Use} of {Common} {Libraries} in {Android} {Apps}},
	isbn = {978-1-5090-1855-0},
	url = {http://ieeexplore.ieee.org/document/7476661/},
	doi = {10.1109/SANER.2016.52},
	abstract = {The packaging model of Android apps requires the entire code necessary for the execution of an app to be shipped into one single apk ﬁle. Thus, an analysis of Android apps often visits code which is not part of the functionality delivered by the app. Such code is often contributed by the common libraries which are used pervasively by all apps. Unfortunately, Android analyses, e.g., for piggybacking detection and malware detection, can produce inaccurate results if they do not take into account the case of library code, which constitute noise in app features. Despite some efforts on investigating Android libraries, the momentum of Android research has not yet produced a complete set of common libraries to further support in-depth analysis of Android apps. In this paper, we leverage a dataset of about 1.5 million apps from Google Play to harvest potential common libraries, including advertisement libraries. With several steps of reﬁnements, we ﬁnally collect by far the largest set of 1,113 libraries supporting common functionalities and 240 libraries for advertisement. We use the dataset to investigates several aspects of Android libraries, including their popularity and their proportion in Android app code. Based on these datasets, we have further performed several empirical investigations to conﬁrm the motivations behind our work.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {IEEE},
	author = {Li, Li and Bissyande, Tegawende F. and Klein, Jacques and Traon, Yves Le},
	month = mar,
	year = {2016},
	keywords = {study, piggybacking},
	pages = {403--414},
	file = {Li et al. - 2016 - An Investigation into the Use of Common Libraries .pdf:/home/fmind/Documents/Zotero/storage/E786Y4NP/Li et al. - 2016 - An Investigation into the Use of Common Libraries .pdf:application/pdf}
}

@incollection{hutchison_adam:_2013,
	address = {Berlin, Heidelberg},
	title = {{ADAM}: {An} {Automatic} and {Extensible} {Platform} to {Stress} {Test} {Android} {Anti}-virus {Systems}},
	volume = {7591},
	isbn = {978-3-642-37299-5 978-3-642-37300-8},
	shorttitle = {{ADAM}},
	url = {http://link.springer.com/10.1007/978-3-642-37300-8_5},
	abstract = {With the rising threat of smartphone malware, both academic community and commercial anti-virus companies proposed many methodologies and products to defend against smartphone malware. Thus, how to assess the effectiveness of these defense mechanisms against existing and unknown malware becomes important. We propose ADAM, an automated and extensible system that can evaluate, via large-scale stress tests, the effectiveness of anti-virus systems against a variety of malware samples for the Android platform. Speciﬁcally, ADAM can automatically transform an original malware sample to different variants via repackaging and obfuscation techniques in order to evaluate the robustness of different anti-virus systems against malware mutation. The transformation and evaluation processes of ADAM are fully automatic, generic, and extensible for different types of malware, anti-virus systems, and malware transformation techniques. We demonstrate the efﬁcacy of ADAM using 222 Android malware samples that we collected in the wild. Using ADAM, we generate different variants based on our collected malware samples, and evaluate the detection of these variants against commercial anti-virus systems.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Detection of {Intrusions} and {Malware}, and {Vulnerability} {Assessment}},
	publisher = {Springer Berlin Heidelberg},
	author = {Zheng, Min and Lee, Patrick P. C. and Lui, John C. S.},
	editor = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard and Flegel, Ulrich and Markatos, Evangelos and Robertson, William},
	year = {2013},
	doi = {10.1007/978-3-642-37300-8_5},
	keywords = {obfuscation},
	pages = {82--101},
	file = {Zheng et al. - 2013 - ADAM An Automatic and Extensible Platform to Stre.pdf:/home/fmind/Documents/Zotero/storage/NPTHSFBR/Zheng et al. - 2013 - ADAM An Automatic and Extensible Platform to Stre.pdf:application/pdf}
}

@article{reina_system_2013,
	title = {A {System} {Call}-{Centric} {Analysis} and {Stimulation} {Technique} to {Automatically} {Reconstruct} {Android} {Malware} {Behaviors}},
	abstract = {With more than 500 million of activations reported in Q3 2012, Android mobile devices are becoming ubiquitous and trends conﬁrm this is unlikely to slow down. App stores, such as Google Play, drive the entire economy of mobile applications. Unfortunately, high turnovers and access to sensitive data have soon attracted the interests of cybercriminals too with malware now hitting Android devices at an alarmingly rising pace. In this paper we present CopperDroid, an approach built on top of QEMU to automatically perform out-of-the-box dynamic behavioral analysis of Android malware. To this end, CopperDroid presents a uniﬁed analysis to characterize low-level OS-speciﬁc and high-level Android-speciﬁc behaviors. Based on the observation that such behaviors are however achieved through the invocation of system calls, CopperDroid’s VM-based dynamic system call-centric analysis is able to faithfully describe the behavior of Android malware whether it is initiated from Java, JNI or native code execution.},
	language = {en},
	author = {Reina, Alessandro and Fattori, Aristide and Cavallaro, Lorenzo},
	month = apr,
	year = {2013},
	keywords = {dynamic, characterization},
	pages = {6},
	file = {Reina et al. - A System Call-Centric Analysis and Stimulation Tec.pdf:/home/fmind/Documents/Zotero/storage/3PRGLIMW/Reina et al. - A System Call-Centric Analysis and Stimulation Tec.pdf:application/pdf}
}

@inproceedings{felt_survey_2011,
	title = {A survey of mobile malware in the wild},
	isbn = {978-1-4503-1000-0},
	url = {http://dl.acm.org/citation.cfm?doid=2046614.2046618},
	doi = {10.1145/2046614.2046618},
	abstract = {Mobile malware is rapidly becoming a serious threat. In this paper, we survey the current state of mobile malware in the wild. We analyze the incentives behind 46 pieces of iOS, Android, and Symbian malware that spread in the wild from 2009 to 2011. We also use this data set to evaluate the eﬀectiveness of techniques for preventing and identifying mobile malware. After observing that 4 pieces of malware use root exploits to mount sophisticated attacks on Android phones, we also examine the incentives that cause non-malicious smartphone tinkerers to publish root exploits and survey the availability of root exploits.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Felt, Adrienne Porter and Finifter, Matthew and Chin, Erika and Hanna, Steve and Wagner, David},
	year = {2011},
	keywords = {survey, reverse},
	pages = {3},
	file = {Felt et al. - 2011 - A survey of mobile malware in the wild.pdf:/home/fmind/Documents/Zotero/storage/6XZ9T72F/Felt et al. - 2011 - A survey of mobile malware in the wild.pdf:application/pdf}
}

@incollection{satapathy_survey_2016,
	address = {Singapore},
	title = {A {Survey} of {Android} {Malware} {Detection} {Strategy} and {Techniques}},
	volume = {409},
	isbn = {978-981-10-0133-8 978-981-10-0135-2},
	url = {http://link.springer.com/10.1007/978-981-10-0135-2_4},
	abstract = {The expeditious growth of Android malwares has posed a serious challenge in front of researchers. The researchers are continuously proposing countermeasures and developing tools to mitigate against such attacks. In this paper, widely used techniques that have been proposed recently by researchers have been explored. The key contributions of each of these techniques along with their limitations have been analyzed. All these techniques were compared based on nine parameters and it was identiﬁed that Mobile Sandbox tool is the best when time factor is not considered because it possess the capability of both a static and dynamic analysis, native API call tracking and web accessibility. If time factor is considered, then Dendroid performs best among all. This is due to the reason that it applies text mining to get the signature of malware and it can also classify unknown malware sample through 1-NN classiﬁer.},
	language = {en},
	urldate = {2018-04-10},
	booktitle = {Proceedings of {International} {Conference} on {ICT} for {Sustainable} {Development}},
	publisher = {Springer Singapore},
	author = {Sharma, Mohit and Chawla, Meenu and Gajrani, Jyoti},
	editor = {Satapathy, Suresh Chandra and Joshi, Amit and Modi, Nilesh and Pathak, Nisarg},
	year = {2016},
	doi = {10.1007/978-981-10-0135-2_4},
	keywords = {detection, survey},
	pages = {39--51},
	file = {Sharma et al. - 2016 - A Survey of Android Malware Detection Strategy and.pdf:/home/fmind/Documents/Zotero/storage/KZB3TNTD/Sharma et al. - 2016 - A Survey of Android Malware Detection Strategy and.pdf:application/pdf}
}

@article{enck_study_2011,
	title = {A {Study} of {Android} {Application} {Security}},
	language = {en},
	author = {Enck, William and Octeau, Damien and McDaniel, Patrick and Chaudhuri, Swarat},
	year = {2011},
	pages = {38},
	file = {Enck et al. - A Study of Android Application Security.pdf:/home/fmind/Documents/Zotero/storage/A4M93C2L/Enck et al. - A Study of Android Application Security.pdf:application/pdf}
}

@inproceedings{barrera_methodology_2010,
	title = {A methodology for empirical analysis of permission-based security models and its application to android},
	isbn = {978-1-4503-0245-6},
	url = {http://portal.acm.org/citation.cfm?doid=1866307.1866317},
	doi = {10.1145/1866307.1866317},
	abstract = {Permission-based security models provide controlled access to various system resources. The expressiveness of the permission set plays an important role in providing the right level of granularity in access control. In this work, we present a methodology for the empirical analysis of permission-based security models which makes novel use of the Self-Organizing Map (SOM) algorithm of Kohonen (2001). While the proposed methodology may be applicable to a wide range of architectures, we analyze 1,100 Android applications as a case study. Our methodology is of independent interest for visualization of permissionbased systems beyond our present Android-speciﬁc empirical analysis. We offer some discussion identifying potential points of improvement for the Android permission model, attempting to increase expressiveness where needed without increasing the total number of permissions or overall complexity.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Barrera, David and Kayacik, H. G üne ş and van Oorschot, Paul C. and Somayaji, Anil},
	year = {2010},
	keywords = {study, permissions},
	pages = {73},
	file = {Barrera et al. - 2010 - A methodology for empirical analysis of permission.pdf:/home/fmind/Documents/Zotero/storage/ENUTEYMC/Barrera et al. - 2010 - A methodology for empirical analysis of permission.pdf:application/pdf}
}

@inproceedings{viennot_measurement_2014,
	title = {A measurement study of google play},
	isbn = {978-1-4503-2789-3},
	url = {http://dl.acm.org/citation.cfm?doid=2591971.2592003},
	doi = {10.1145/2591971.2592003},
	abstract = {Although millions of users download and use third-party Android applications from the Google Play store, little information is known on an aggregated level about these applications. We have built PlayDrone, the ﬁrst scalable Google Play store crawler, and used it to index and analyze over 1,100,000 applications in the Google Play store on a daily basis, the largest such index of Android applications. PlayDrone leverages various hacking techniques to circumvent Google’s roadblocks for indexing Google Play store content, and makes proprietary application sources available, including source code for over 880,000 free applications. We demonstrate the usefulness of PlayDrone in decompiling and analyzing application content by exploring four previously unaddressed issues: the characterization of Google Play application content at large scale and its evolution over time, library usage in applications and its impact on application portability, duplicative application content in Google Play, and the ineﬀectiveness of OAuth and related service authentication mechanisms resulting in malicious users being able to easily gain unauthorized access to user data and resources on Amazon Web Services and Facebook.},
	language = {en},
	urldate = {2018-04-10},
	publisher = {ACM Press},
	author = {Viennot, Nicolas and Garcia, Edward and Nieh, Jason},
	year = {2014},
	keywords = {study},
	pages = {221--233},
	file = {Viennot et al. - 2014 - A measurement study of google play.pdf:/home/fmind/Documents/Zotero/storage/9JQAAGCA/Viennot et al. - 2014 - A measurement study of google play.pdf:application/pdf}
}

@article{wermke_large_2018,
	title = {A {Large} {Scale} {Investigation} of {Obfuscation} {Use} in {Google} {Play}},
	abstract = {Android applications are frequently plagiarized or repackaged, and software obfuscation is a recommended protection against these practices. However, there is very little data on the overall rates of app obfuscation, the techniques used, or factors that lead to developers to choose to obfuscate their apps. In this paper, we present the first comprehensive analysis of the use of and challenges to software obfuscation in Android applications. We analyzed 1.7 million free Android apps from Google Play to detect various obfuscation techniques, finding that only 24.92\% of apps are obfuscated by the developer. To better understand this rate of obfuscation, we surveyed 308 Google Play developers about their experiences and attitudes about obfuscation. We found that while developers feel that apps in general are at risk of plagiarism, they do not fear theft of their own apps. Developers also self-report difficulties applying obfuscation for their own apps. To better understand this, we conducted a follow-up study where the vast majority of 70 participants failed to obfuscate a realistic sample app even while many mistakenly believed they had been successful. Our findings show that more work is needed to make obfuscation tools more usable, to educate developers on the risk of their apps being reverse engineered, their intellectual property stolen, their apps being repackaged and redistributed as malware and to improve the health of the overall Android ecosystem.},
	language = {en},
	author = {Wermke, Dominik and Huaman, Nicolas and Acar, Yasemin and Reaves, Brad and Traynor, Patrick and Fahl, Sascha},
	month = jan,
	year = {2018},
	keywords = {study, obfuscation},
	pages = {14},
	file = {Wermke et al. - A Large Scale Investigation of Obfuscation Use in .pdf:/home/fmind/Documents/Zotero/storage/4BXLXKKW/Wermke et al. - A Large Scale Investigation of Obfuscation Use in .pdf:application/pdf}
}

@inproceedings{harley_game_2009,
	title = {The {Game} of the {Name} {Malware} {Naming}, {Shape} {Shifters} and {Sympathetic} {Magic}},
	booktitle = {{CEET} 3rd {Intl}. {Conf}. on {Cybercrime} {Forensics} {Education} \& {Training}, {San} {Diego}, {CA}},
	author = {Harley, David},
	year = {2009},
	keywords = {vt-labels},
	file = {Harley - 2009 - The Game of the Name Malware Naming, Shape Shifter.pdf:/home/fmind/Documents/Zotero/storage/2DZVCICC/Harley - 2009 - The Game of the Name Malware Naming, Shape Shifter.pdf:application/pdf}
}

@inproceedings{jordaney_transcend:_2017,
	address = {Vancouver, BC},
	title = {Transcend: {Detecting} {Concept} {Drift} in {Malware} {Classification} {Models}},
	isbn = {978-1-931971-40-9},
	url = {https://www.usenix.org/conference/usenixsecurity17/technical-sessions/presentation/jordaney},
	booktitle = {26th {USENIX} {Security} {Symposium} ({USENIX} {Security} 17)},
	publisher = {USENIX Association},
	author = {Jordaney, Roberto and Sharad, Kumar and Dash, Santanu K. and Wang, Zhi and Papini, Davide and Nouretdinov, Ilia and Cavallaro, Lorenzo},
	year = {2017},
	keywords = {classification},
	pages = {625--642},
	file = {USENIX Association - Proceedings of the Second Workshop on Real, Large .pdf:/home/fmind/Documents/Zotero/storage/MNBCTJS6/USENIX Association - Proceedings of the Second Workshop on Real, Large .pdf:application/pdf}
}

@article{papernot_practical_2016,
	title = {Practical {Black}-{Box} {Attacks} against {Deep} {Learning} {Systems} using {Adversarial} {Examples}},
	volume = {abs/1602.02697},
	url = {http://arxiv.org/abs/1602.02697},
	journal = {CoRR},
	author = {Papernot, Nicolas and McDaniel, Patrick D. and Goodfellow, Ian J. and Jha, Somesh and Celik, Z. Berkay and Swami, Ananthram},
	year = {2016},
	file = {Practical Black-Box Attacks against Deep Learning Systems using Adversarial Examples.pdf:/home/fmind/Documents/Zotero/storage/HAPGV7ZL/Practical Black-Box Attacks against Deep Learning Systems using Adversarial Examples.pdf:application/pdf}
}

@inproceedings{christodorescu_mining_2007,
	address = {New York, NY, USA},
	series = {{ESEC}-{FSE} '07},
	title = {Mining {Specifications} of {Malicious} {Behavior}},
	isbn = {978-1-59593-811-4},
	url = {http://doi.acm.org/10.1145/1287624.1287628},
	doi = {10.1145/1287624.1287628},
	booktitle = {Proceedings of the the 6th {Joint} {Meeting} of the {European} {Software} {Engineering} {Conference} and the {ACM} {SIGSOFT} {Symposium} on {The} {Foundations} of {Software} {Engineering}},
	publisher = {ACM},
	author = {Christodorescu, Mihai and Jha, Somesh and Kruegel, Christopher},
	year = {2007},
	keywords = {detection, characterization, vt-features},
	pages = {5--14},
	file = {Mining Specifications of Malicious Behavior.pdf:/home/fmind/Documents/Zotero/storage/RRQKM2X2/Mining Specifications of Malicious Behavior.pdf:application/pdf}
}

@inproceedings{xue_malton:_2017,
	address = {Vancouver, BC},
	title = {Malton: {Towards} {On}-{Device} {Non}-{Invasive} {Mobile} {Malware} {Analysis} for {ART}},
	isbn = {978-1-931971-40-9},
	url = {https://www.usenix.org/conference/usenixsecurity17/technical-sessions/presentation/xue},
	booktitle = {26th {USENIX} {Security} {Symposium} ({USENIX} {Security} 17)},
	publisher = {USENIX Association},
	author = {Xue, Lei and Zhou, Yajin and Chen, Ting and Luo, Xiapu and Gu, Guofei},
	year = {2017},
	keywords = {dynamic, characterization},
	pages = {289--306},
	file = {Malton Towards On-Device Non-Invasive Mobile Malware Analysis for ART.pdf:/home/fmind/Documents/Zotero/storage/8W5ZLEIE/Malton Towards On-Device Non-Invasive Mobile Malware Analysis for ART.pdf:application/pdf}
}

@inproceedings{allix_androzoo:_2016,
	title = {{AndroZoo}: {Collecting} {Millions} of {Android} {Apps} for the {Research} {Community}},
	doi = {10.1109/MSR.2016.056},
	abstract = {We present a growing collection of Android Applications col-lected from several sources, including the official GooglePlay app market. Our dataset, AndroZoo, currently contains more than three million apps, each of which has beenanalysed by tens of different AntiVirus products to knowwhich applications are detected as Malware. We provide thisdataset to contribute to ongoing research efforts, as well asto enable new potential research topics on Android Apps.By releasing our dataset to the research community, we alsoaim at encouraging our fellow researchers to engage in reproducible experiments.},
	booktitle = {2016 {IEEE}/{ACM} 13th {Working} {Conference} on {Mining} {Software} {Repositories} ({MSR})},
	author = {Allix, K. and Bissyandé, T. F. and Klein, J. and Traon, Y. L.},
	month = may,
	year = {2016},
	pages = {468--471},
	file = {Allix et al. - 2016 - AndroZoo Collecting Millions of Android Apps for .pdf:/home/fmind/Documents/Zotero/storage/JVYGVWMH/Allix et al. - 2016 - AndroZoo Collecting Millions of Android Apps for .pdf:application/pdf}
}

@inproceedings{tam_copperdroid:_2015,
	title = {{CopperDroid}: {Automatic} {Reconstruction} of {Android} {Malware} {Behaviors}},
	isbn = {978-1-891562-38-9},
	shorttitle = {{CopperDroid}},
	url = {https://www.ndss-symposium.org/ndss2015/ndss-2015-programme/copperdroid-automatic-reconstruction-android-malware-behaviors/},
	doi = {10.14722/ndss.2015.23145},
	abstract = {Mobile devices and their application marketplaces drive the entire economy of the today’s mobile landscape. Android platforms alone have produced staggering revenues, exceeding ﬁve billion USD, which has attracted cybercriminals and increased malware in Android markets at an alarming rate. To better understand this slew of threats, we present CopperDroid, an automatic VMI-based dynamic analysis system to reconstruct the behaviors of Android malware. The novelty of CopperDroid lies in its agnostic approach to identify interesting OS- and high-level Android-speciﬁc behaviors. It reconstructs these behaviors by observing and dissecting system calls and, therefore, is resistant to the multitude of alterations the Android runtime is subjected to over its life-cycle. CopperDroid automatically and accurately reconstructs events of interest that describe, not only well-known process-OS interactions (e.g., ﬁle and process creation), but also complex intra- and inter-process communications (e.g., SMS reception), whose semantics are typically contextualized through complex Android objects. Because CopperDroid’s reconstruction mechanisms are agnostic to the underlying action invocation methods, it is able to capture actions initiated both from Java and native code execution. CopperDroid’s analysis generates detailed behavioral proﬁles that abstract a large stream of low-level—often uninteresting—events into concise, high-level semantics, which are well-suited to provide insightful behavioral traits and open the possibility to further research directions. We carried out an extensive evaluation to assess the capabilities and performance of CopperDroid on more than 2,900 Android malware samples. Our experiments show that CopperDroid faithfully reconstructs OSand Android-speciﬁc behaviors. Additionally, we demonstrate how CopperDroid can be leveraged to disclose additional behaviors through the use of a simple, yet effective, app stimulation technique. Using this technique, we successfully triggered and disclosed additional behaviors on more than 60\% of the analyzed malware samples. This qualitatively demonstrates the versatility of CopperDroid’s ability to improve dynamic-based code coverage.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {Internet Society},
	author = {Tam, Kimberly and Khan, Salahuddin J. and Fattori, Aristide and Cavallaro, Lorenzo},
	year = {2015},
	keywords = {classification, dynamic},
	file = {Tam et al. - 2015 - CopperDroid Automatic Reconstruction of Android M.pdf:/home/fmind/Documents/Zotero/storage/Q2N8FKVZ/Tam et al. - 2015 - CopperDroid Automatic Reconstruction of Android M.pdf:application/pdf}
}

@inproceedings{feng_apposcopy:_2014,
	title = {Apposcopy: semantics-based detection of {Android} malware through static analysis},
	isbn = {978-1-4503-3056-5},
	shorttitle = {Apposcopy},
	url = {http://dl.acm.org/citation.cfm?doid=2635868.2635869},
	doi = {10.1145/2635868.2635869},
	abstract = {We present Apposcopy, a new semantics-based approach for identifying a prevalent class of Android malware that steals private user information. Apposcopy incorporates (i) a highlevel language for specifying signatures that describe semantic characteristics of malware families and (ii) a static analysis for deciding if a given application matches a malware signature. The signature matching algorithm of Apposcopy uses a combination of static taint analysis and a new form of program representation called Inter-Component Call Graph to eﬃciently detect Android applications that have certain control- and data-ﬂow properties. We have evaluated Apposcopy on a corpus of real-world Android applications and show that it can eﬀectively and reliably pinpoint malicious applications that belong to certain malware families.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {ACM Press},
	author = {Feng, Yu and Anand, Saswat and Dillig, Isil and Aiken, Alex},
	year = {2014},
	keywords = {detection, static},
	pages = {576--587},
	file = {Feng et al. - 2014 - Apposcopy semantics-based detection of Android ma.pdf:/home/fmind/Documents/Zotero/storage/MTICVIU7/Feng et al. - 2014 - Apposcopy semantics-based detection of Android ma.pdf:application/pdf}
}

@inproceedings{hurier_euphony:_2017,
	title = {Euphony: {Harmonious} {Unification} of {Cacophonous} {Anti}-{Virus} {Vendor} {Labels} for {Android} {Malware}},
	copyright = {All rights reserved},
	isbn = {978-1-5386-1544-7},
	shorttitle = {Euphony},
	url = {http://ieeexplore.ieee.org/document/7962391/},
	doi = {10.1109/MSR.2017.57},
	abstract = {Android malware is now pervasive and evolving rapidly. Thousands of malware samples are discovered every day with new models of attacks. The growth of these threats has come hand in hand with the proliferation of collective repositories sharing the latest specimens. Having access to a large number of samples opens new research directions aiming at efﬁciently vetting apps. However, automatically inferring a reference dataset from those repositories is not straightforward and can inadvertently lead to unforeseen misconceptions. On the one hand, samples are often mis-labeled as different parties use distinct naming schemes for the same sample. On the other hand, samples are frequently mis-classiﬁed due to conceptual errors made during labeling processes. In this paper, we mine AntiVirus labels and analyze the associations between all labels given by different vendors to systematically unify common samples into family groups. The key novelty of our approach, named EUPHONY [20], is that no a-priori knowledge on malware families is needed. We evaluate EUPHONY using reference datasets and more than 400 thousands additional samples outside of these datasets. Results show that EUPHONY can accurately label malware with a ﬁne-grained clustering of families, while providing competitive performance against the state-of-the-art.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {IEEE},
	author = {Hurier, Mederic and Suarez-Tangil, Guillermo and Dash, Santanu Kumar and Bissyande, Tegawende F. and Le Traon, Yves and Klein, Jacques and Cavallaro, Lorenzo},
	month = may,
	year = {2017},
	keywords = {vt-labels},
	pages = {425--435},
	file = {Hurier et al. - 2017 - Euphony Harmonious Unification of Cacophonous Ant.pdf:/home/fmind/Documents/Zotero/storage/IHSU22KI/Hurier et al. - 2017 - Euphony Harmonious Unification of Cacophonous Ant.pdf:application/pdf}
}

@misc{ramirez_koodous_nodate,
	title = {Koodous},
	url = {https://koodous.com/about},
	abstract = {Koodous is a collaborative platform that combines the power of online analysis tools with social interactions between the analysts over a vast APKs repository.},
	language = {en},
	urldate = {2018-04-01},
	journal = {About Koodous},
	author = {Ramírez, Fernando and López, Francisco and Vaca, Daniel and Sánchez, Antonio},
	note = {https://koodous.com/about}
}

@misc{plohmann_malpedia_nodate,
	title = {Malpedia},
	url = {https://malpedia.caad.fkie.fraunhofer.de/},
	abstract = {The primary goal of Malpedia is to provide a resource for rapid identification and actionable context when investigating malware. Openness to curated contributions shall ensure an accountable level of quality in order to foster meaningful and reproducible research.},
	language = {en},
	urldate = {2018-04-01},
	journal = {Malpedia},
	author = {Plohmann, Daniel},
	note = {https://malpedia.caad.fkie.fraunhofer.de/},
	keywords = {vt-features}
}

@misc{parkour_contagio_nodate,
	title = {Contagio {MiniDump}},
	url = {https://contagiominidump.blogspot.lu/},
	abstract = {Contagio mobile mini-dump is a part of contagiodump.blogspot.com. Contagio mobile mini-dump offers an upload dropbox for you to share your mobile malware samples.},
	language = {en},
	urldate = {2018-04-01},
	journal = {About Contagio MiniDump},
	author = {Parkour, Mila},
	note = {https://contagiominidump.blogspot.lu/}
}

@misc{allix_androzoo_nodate,
	title = {Androzoo},
	url = {https://androzoo.uni.lu/},
	abstract = {AndroZoo is a growing collection of Android Applications collected from several sources, including the official Google Play app market. 
It currently contains 5,817,629 different APKs, each of which has been (or will soon be) analysed by tens of different AntiVirus products to know which applications are detected as Malware. 
We provide this dataset to contribute to ongoing research efforts, as well as to enable new potential research topics on Android Apps. 
By releasing our dataset to the research community, we also aim at encouraging our fellow researchers to engage in reproducible experiments.},
	language = {en},
	urldate = {2018-04-01},
	journal = {Androzoo},
	author = {Allix, Kevin},
	note = {https://androzoo.uni.lu/}
}

@misc{desnos_androguard_nodate,
	type = {Source {Code}},
	title = {Androguard},
	url = {https://github.com/androguard/androguard},
	abstract = {Reverse engineering, Malware and goodware analysis of Android applications ... and more (ninja !)},
	language = {en},
	urldate = {2018-04-01},
	journal = {Androguard Github Repository},
	author = {Desnos, Anthony and Gueguen, Geoffroy},
	note = {https://github.com/androguard/androguard}
}

@misc{skulason_caro_nodate,
	title = {{CARO}},
	url = {http://www.caro.org/articles/naming.html},
	language = {en},
	urldate = {2018-04-01},
	journal = {A New Virus Naming Convention},
	author = {Skulason, Fridrik and Solomon, Alan and Bontchev, Vesselin},
	note = {http://www.caro.org/articles/naming.html},
	keywords = {vt-labels}
}

@misc{noauthor_virustotal_nodate,
	title = {{VirusTotal}},
	url = {https://www.virustotal.com/about/},
	abstract = {VirusTotal, a subsidiary of Google, is a free online service that analyzes files and URLs enabling the identification of viruses, worms, trojans and other kinds of malicious content detected by antivirus engines and website scanners. At the same time, it may be used as a means to detect false positives, i.e. innocuous resources detected as malicious by one or more scanners.

VirusTotal’s mission is to help in improving the antivirus and security industry and make the internet a safer place through the development of free tools and services.},
	language = {en},
	urldate = {2018-04-01},
	journal = {About VirusTotal},
	note = {https://www.virustotal.com/about/}
}

@article{suarez-tangil_eight_2018,
	title = {Eight {Years} of {Rider} {Measurement} in the {Android} {Malware} {Ecosystem}: {Evolution} and {Lessons} {Learned}},
	abstract = {Despite the growing threat posed by Android malware, the research community is still lacking a comprehensive view of common behaviors and trends exposed by malware families active on the platform. Without such view, the researchers incur the risk of developing systems that only detect outdated threats, missing the most recent ones. In this paper, we conduct the largest measurement of Android malware behavior to date, analyzing over 1.2 million malware samples that belong to 1.2K families over a period of eight years (from 2010 to 2017). We aim at understanding how the behavior of Android malware has evolved over time, focusing on repackaging malware. In this type of threats different innocuous apps are piggybacked with a malicious payload (rider), allowing inexpensive malware manufacturing.},
	language = {en},
	author = {Suarez-Tangil, Guillermo and Stringhini, Gianluca},
	month = jan,
	year = {2018},
	keywords = {study, piggybacking, vt-features, evolution},
	pages = {18},
	file = {Suarez-Tangil and Stringhini - Eight Years of Rider Measurement in the Android Ma.pdf:/home/fmind/Documents/Zotero/storage/CLWZZYTZ/Suarez-Tangil and Stringhini - Eight Years of Rider Measurement in the Android Ma.pdf:application/pdf}
}

@article{li_understanding_2017,
	title = {Understanding {Android} {App} {Piggybacking}: {A} {Systematic} {Study} of {Malicious} {Code} {Grafting}},
	volume = {12},
	issn = {1556-6013, 1556-6021},
	shorttitle = {Understanding {Android} {App} {Piggybacking}},
	url = {http://ieeexplore.ieee.org/document/7828100/},
	doi = {10.1109/TIFS.2017.2656460},
	abstract = {The Android packaging model offers ample opportunities for malware writers to piggyback malicious code in popular apps, which can then be easily spread to a large user base. Although recent research has produced approaches and tools to identify piggybacked apps, the literature lacks a comprehensive investigation into such phenomenon. We ﬁll this gap by: 1) systematically building a large set of piggybacked and benign apps pairs, which we release to the community; 2) empirically studying the characteristics of malicious piggybacked apps in comparison with their benign counterparts; and 3) providing insights on piggybacking processes. Among several ﬁndings providing insights analysis techniques should build upon to improve the overall detection and classiﬁcation accuracy of piggybacked apps, we show that piggybacking operations not only concern app code, but also extensively manipulates app resource ﬁles, largely contradicting common beliefs. We also ﬁnd that piggybacking is done with little sophistication, in many cases automatically, and often via library code.},
	language = {en},
	number = {6},
	urldate = {2018-04-11},
	journal = {IEEE Transactions on Information Forensics and Security},
	author = {Li, Li and Li, Daoyuan and Bissyande, Tegawende F. and Klein, Jacques and Le Traon, Yves and Lo, David and Cavallaro, Lorenzo},
	month = jun,
	year = {2017},
	keywords = {static, study, piggybacking, characterization, vt-features},
	pages = {1269--1284},
	file = {Li et al. - 2017 - Understanding Android App Piggybacking A Systemat.pdf:/home/fmind/Documents/Zotero/storage/D7KYFVIL/Li et al. - 2017 - Understanding Android App Piggybacking A Systemat.pdf:application/pdf}
}

@inproceedings{chen_more_2016,
	title = {More {Semantics} {More} {Robust}: {Improving} {Android} {Malware} {Classifiers}},
	isbn = {978-1-4503-4270-4},
	shorttitle = {More {Semantics} {More} {Robust}},
	url = {http://dl.acm.org/citation.cfm?doid=2939918.2939931},
	doi = {10.1145/2939918.2939931},
	abstract = {Automatic malware classiﬁers often perform badly on the detection of new malware, i.e., their robustness is poor. We study the machine-learning-based mobile malware classiﬁers and reveal one reason: the input features used by these classiﬁers can’t capture general behavioural patterns of malware instances. We extract the best-performing syntaxbased features like permissions and API calls, and some semantics-based features like happen-befores and unwanted behaviours, and train classiﬁers using popular supervised and semi-supervised learning methods. By comparing their classiﬁcation performance on industrial datasets collected across several years, we demonstrate that using semanticsbased features can dramatically improve robustness of malware classiﬁers.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {ACM Press},
	author = {Chen, Wei and Aspinall, David and Gordon, Andrew D. and Sutton, Charles and Muttik, Igor},
	year = {2016},
	keywords = {detection, static},
	pages = {147--158},
	file = {Chen et al. - 2016 - More Semantics More Robust Improving Android Malw.pdf:/home/fmind/Documents/Zotero/storage/AL8HSCGL/Chen et al. - 2016 - More Semantics More Robust Improving Android Malw.pdf:application/pdf}
}

@inproceedings{lindorfer_marvin:_2015,
	title = {{MARVIN}: {Efficient} and {Comprehensive} {Mobile} {App} {Classification} through {Static} and {Dynamic} {Analysis}},
	isbn = {978-1-4673-6564-2},
	shorttitle = {{MARVIN}},
	url = {http://ieeexplore.ieee.org/document/7273650/},
	doi = {10.1109/COMPSAC.2015.103},
	abstract = {Android dominates the smartphone operating system market and consequently has attracted the attention of malware authors and researchers alike. Despite the considerable number of proposed malware analysis systems, comprehensive and practical malware analysis solutions are scarce and often short-lived. Systems relying on static analysis alone struggle with increasingly popular obfuscation and dynamic code loading techniques, while purely dynamic analysis systems are prone to analysis evasion.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {IEEE},
	author = {Lindorfer, Martina and Neugschwandtner, Matthias and Platzer, Christian},
	month = jul,
	year = {2015},
	keywords = {detection, static, dynamic},
	pages = {422--433},
	file = {Lindorfer et al. - 2015 - MARVIN Efficient and Comprehensive Mobile App Cla.pdf:/home/fmind/Documents/Zotero/storage/JZB2HQMH/Lindorfer et al. - 2015 - MARVIN Efficient and Comprehensive Mobile App Cla.pdf:application/pdf}
}

@inproceedings{suarez-tangil_droidsieve:_2017,
	title = {{DroidSieve}: {Fast} and {Accurate} {Classification} of {Obfuscated} {Android} {Malware}},
	isbn = {978-1-4503-4523-1},
	shorttitle = {{DroidSieve}},
	url = {http://dl.acm.org/citation.cfm?doid=3029806.3029825},
	doi = {10.1145/3029806.3029825},
	abstract = {With more than two million applications, Android marketplaces require automatic and scalable methods to efficiently vet apps for the absence of malicious threats. Recent techniques have successfully relied on the extraction of lightweight syntactic features suitable for machine learning classification, but despite their promising results, the very nature of such features suggest they would unlikely—on their own—be suitable for detecting obfuscated Android malware. To address this challenge, we propose DroidSieve, an Android malware classifier based on static analysis that is fast, accurate, and resilient to obfuscation. For a given app, DroidSieve first decides whether the app is malicious and, if so, classifies it as belonging to a family of related malware. DroidSieve exploits obfuscation-invariant features and artifacts introduced by obfuscation mechanisms used in malware. At the same time, these purely static features are designed for processing at scale and can be extracted quickly. For malware detection, we achieve up to 99.82\% accuracy with zero false positives; for family identification of obfuscated malware, we achieve 99.26\% accuracy at a fraction of the computational cost of state-of-the-art techniques.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {ACM Press},
	author = {Suarez-Tangil, Guillermo and Dash, Santanu Kumar and Ahmadi, Mansour and Kinder, Johannes and Giacinto, Giorgio and Cavallaro, Lorenzo},
	year = {2017},
	keywords = {detection, static},
	pages = {309--320},
	file = {Suarez-Tangil et al. - 2017 - DroidSieve Fast and Accurate Classification of Ob.pdf:/home/fmind/Documents/Zotero/storage/P768E88S/Suarez-Tangil et al. - 2017 - DroidSieve Fast and Accurate Classification of Ob.pdf:application/pdf}
}

@inproceedings{dash_droidscribe:_2016,
	title = {{DroidScribe}: {Classifying} {Android} {Malware} {Based} on {Runtime} {Behavior}},
	isbn = {978-1-5090-3690-5},
	shorttitle = {{DroidScribe}},
	url = {http://ieeexplore.ieee.org/document/7527777/},
	doi = {10.1109/SPW.2016.25},
	abstract = {The Android ecosystem has witnessed a surge in malware, which not only puts mobile devices at risk but also increases the burden on malware analysts assessing and categorizing threats. In this paper, we show how to use machine learning to automatically classify Android malware samples into families with high accuracy, while observing only their runtime behavior. We focus exclusively on dynamic analysis of runtime behavior to provide a clean point of comparison that is dual to static approaches. Speciﬁc challenges in the use of dynamic analysis on Android are the limited information gained from tracking low-level events and the imperfect coverage when testing apps, e.g., due to inactive command and control servers. We observe that on Android, pure system calls do not carry enough semantic content for classiﬁcation and instead rely on lightweight virtual machine introspection to also reconstruct Android-level interprocess communication. To address the sparsity of data resulting from low coverage, we introduce a novel classiﬁcation method that fuses Support Vector Machines with Conformal Prediction to generate high-accuracy prediction sets where the information is insufﬁcient to pinpoint a single family.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {IEEE},
	author = {Dash, Santanu Kumar and Suarez-Tangil, Guillermo and Khan, Salahuddin and Tam, Kimberly and Ahmadi, Mansour and Kinder, Johannes and Cavallaro, Lorenzo},
	month = may,
	year = {2016},
	keywords = {classification, dynamic, vt-features},
	pages = {252--261},
	file = {Dash et al. - 2016 - DroidScribe Classifying Android Malware Based on .pdf:/home/fmind/Documents/Zotero/storage/RMKN2ZY6/Dash et al. - 2016 - DroidScribe Classifying Android Malware Based on .pdf:application/pdf}
}

@article{wei_deep_2017,
	title = {Deep {Ground} {Truth} {Analysis} of {Current} {Android} {Malware}},
	abstract = {To build eﬀective malware analysis technique and to evaluate new detection tools, up-to-date datasets reﬂecting the current Android malware landscape are essential. For such datasets to be maximally useful, they need to contain reliable and complete information on malware’s behaviors and techniques used in the malicious activities. Such a dataset shall also provide a comprehensive coverage of a large number of types of malware. The Android Malware Genome created circa 2011 has been the only well-labeled and widely studied dataset the research community had easy access to1. But not only is it outdated and no longer represents the current Android malware landscape, it also does not provide as detailed information on malware’s behaviors as needed for research. Thus it is urgent to create a high-quality dataset for Android malware. While existing information sources such as VirusTotal are useful, to obtain the accurate and detailed information for malware behaviors, deep manual analysis is indispensable. In this work we present our approach to preparing a large Android malware dataset for the research community. We leverage existing anti-virus scan result and automation techniques in categorizing our large dataset (containing 24,650 malware app samples) into 135 varieties (based on malware behavioral semantics) that belong to 71 malware families. For each variety, we select three samples as representatives, for a total of 405 malware samples, to conduct in-depth manual analysis. Based on the manual analysis result we generate detailed descriptions of each malware variety’s behaviors and include them in our dataset. We also report our observations on the current landscape of Android malware as depicted in the dataset. Furthermore, we present detailed documentation of the process used in creating the dataset, including the guidelines for the manual analysis. We will make our Android malware dataset available to the research community.},
	language = {en},
	author = {Wei, Fengguo and Li, Yuping and Roy, Sankardas and Ou, Xinming and Zhou, Wu},
	month = jan,
	year = {2017},
	keywords = {classification, vt-features, reverse, dataset},
	pages = {20},
	file = {Wei et al. - Deep Ground Truth Analysis of Current Android Malw.pdf:/home/fmind/Documents/Zotero/storage/MBNNPD4Z/Wei et al. - Deep Ground Truth Analysis of Current Android Malw.pdf:application/pdf}
}

@article{fan_dapasa:_2017,
	title = {{DAPASA}: {Detecting} {Android} {Piggybacked} {Apps} {Through} {Sensitive} {Subgraph} {Analysis}},
	volume = {12},
	issn = {1556-6013, 1556-6021},
	shorttitle = {{DAPASA}},
	url = {http://ieeexplore.ieee.org/document/7887707/},
	doi = {10.1109/TIFS.2017.2687880},
	abstract = {With the exponential growth of smartphone adoption, malware attacks on smartphones have resulted in serious threats to users, especially those on popular platforms, such as Android. Most Android malware is generated by piggybacking malicious payloads into benign applications (apps), which are called piggybacked apps. In this paper, we propose DAPASA, an approach to detect Android piggybacked apps through sensitive subgraph analysis. Two assumptions are established to reﬂect the different invocation patterns of sensitive APIs in the injected malicious payloads (rider) of a piggybacked app and in its host app (carrier). With these two assumptions, DAPASA generates a sensitive subgraph (SSG) to proﬁle the most suspicious behavior of an app. Five features are constructed from SSG to depict the invocation patterns. The ﬁve features are fed into the machine learning algorithms to detect whether the app is piggybacked or benign. DAPASA is evaluated on a large real-world data set consisting of 2551 piggybacked apps and 44 921 popular benign apps. Extensive evaluation results demonstrate that the proposed approach exhibits an impressive detection performance compared with that of three baseline approaches even with only ﬁve numeric features. Furthermore, the proposed approach can complement permission-based approaches and API-based approaches with the combination of our ﬁve features from a new perspective of the invocation structure.},
	language = {en},
	number = {8},
	urldate = {2018-04-11},
	journal = {IEEE Transactions on Information Forensics and Security},
	author = {Fan, Ming and Liu, Jun and Wang, Wei and Li, Haifei and Tian, Zhenzhou and Liu, Ting},
	month = aug,
	year = {2017},
	keywords = {detection, static, piggybacking, vt-features},
	pages = {1772--1785},
	file = {Fan et al. - 2017 - DAPASA Detecting Android Piggybacked Apps Through.pdf:/home/fmind/Documents/Zotero/storage/NGJMSIRA/Fan et al. - 2017 - DAPASA Detecting Android Piggybacked Apps Through.pdf:application/pdf}
}

@inproceedings{li_automatically_2017,
	title = {Automatically {Locating} {Malicious} {Packages} in {Piggybacked} {Android} {Apps}},
	isbn = {978-1-5386-2669-6},
	url = {http://ieeexplore.ieee.org/document/7972732/},
	doi = {10.1109/MOBILESoft.2017.6},
	abstract = {To devise efﬁcient approaches and tools for detecting malicious packages in the Android ecosystem, researchers are increasingly required to have a deep understanding of malware. There is thus a need to provide a framework for dissecting malware and locating malicious program fragments within app code in order to build a comprehensive dataset of malicious samples. Towards addressing this need, we propose in this work a tool-based approach called HookRanker, which provides ranked lists of potentially malicious packages based on the way malware behaviour code is triggered. With experiments on a ground truth set of piggybacked apps, we are able to automatically locate the malicious packages from piggybacked Android apps with an accuracy of 83.6\% in verifying the top ﬁve reported items.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {IEEE},
	author = {Li, Li and Li, Daoyuan and Bissyande, Tegawende F. and Klein, Jacques and Cai, Haipeng and Lo, David and Le Traon, Yves},
	month = may,
	year = {2017},
	keywords = {static, piggybacking, vt-features},
	pages = {170--174},
	file = {Li et al. - 2017 - Automatically Locating Malicious Packages in Piggy.pdf:/home/fmind/Documents/Zotero/storage/3SD28GP6/Li et al. - 2017 - Automatically Locating Malicious Packages in Piggy.pdf:application/pdf}
}

@article{suarez-tangil_evolution_2014,
	title = {Evolution, {Detection} and {Analysis} of {Malware} for {Smart} {Devices}},
	volume = {16},
	issn = {1553-877X},
	url = {http://ieeexplore.ieee.org/document/6657497/},
	doi = {10.1109/SURV.2013.101613.00077},
	abstract = {Smart devices equipped with powerful sensing, computing and networking capabilities have proliferated lately, ranging from popular smartphones and tablets to Internet appliances, smart TVs, and others that will soon appear (e.g., watches, glasses, and clothes). One key feature of such devices is their ability to incorporate third-party apps from a variety of markets. This poses strong security and privacy issues to users and infrastructure operators, particularly through software of malicious (or dubious) nature that can easily get access to the services provided by the device and collect sensory data and personal information. Malware in current smart devices –mostly smartphones and tablets– have rocketed in the last few years, in some cases supported by sophisticated techniques purposely designed to overcome security architectures currently in use by such devices. Even though important advances have been made on malware detection in traditional personal computers during the last decades, adopting and adapting those techniques to smart devices is a challenging problem. For example, power consumption is one major constraint that makes unaffordable to run traditional detection engines on the device, while externalized (i.e., cloud-based) techniques rise many privacy concerns.},
	language = {en},
	number = {2},
	urldate = {2018-04-11},
	journal = {IEEE Communications Surveys \& Tutorials},
	author = {Suarez-Tangil, Guillermo and Tapiador, Juan E. and Peris-Lopez, Pedro and Ribagorda, Arturo},
	year = {2014},
	keywords = {survey, evolution},
	pages = {961--987},
	file = {Suarez-Tangil et al. - 2014 - Evolution, Detection and Analysis of Malware for S.pdf:/home/fmind/Documents/Zotero/storage/8LVUPYWT/Suarez-Tangil et al. - 2014 - Evolution, Detection and Analysis of Malware for S.pdf:application/pdf}
}

@incollection{monrose_avclass:_2016,
	address = {Cham},
	title = {{AVclass}: {A} {Tool} for {Massive} {Malware} {Labeling}},
	volume = {9854},
	isbn = {978-3-319-45718-5 978-3-319-45719-2},
	shorttitle = {{AVclass}},
	url = {http://link.springer.com/10.1007/978-3-319-45719-2_11},
	abstract = {Labeling a malicious executable as a variant of a known family is important for security applications such as triage, lineage, and for building reference datasets in turn used for evaluating malware clustering and training malware classiﬁcation approaches. Oftentimes, such labeling is based on labels output by antivirus engines. While AV labels are well-known to be inconsistent, there is often no other information available for labeling, thus security analysts keep relying on them. However, current approaches for extracting family information from AV labels are manual and inaccurate. In this work, we describe AVCLASS, an automatic labeling tool that given the AV labels for a, potentially massive, number of samples outputs the most likely family names for each sample. AVCLASS implements novel automatic techniques to address 3 key challenges: normalization, removal of generic tokens, and alias detection. We have evaluated AVCLASS on 10 datasets comprising 8.9 M samples, larger than any dataset used by malware clustering and classiﬁcation works. AVCLASS leverages labels from any AV engine, e.g., all 99 AV engines seen in VirusTotal, the largest engine set in the literature. AVCLASS’s clustering achieves F1 measures up to 93.9 on labeled datasets and clusters are labeled with ﬁne-grained family names commonly used by the AV vendors. We release AVCLASS to the community.},
	language = {en},
	urldate = {2018-04-11},
	booktitle = {Research in {Attacks}, {Intrusions}, and {Defenses}},
	publisher = {Springer International Publishing},
	author = {Sebastián, Marcos and Rivera, Richard and Kotzias, Platon and Caballero, Juan},
	editor = {Monrose, Fabian and Dacier, Marc and Blanc, Gregory and Garcia-Alfaro, Joaquin},
	year = {2016},
	doi = {10.1007/978-3-319-45719-2_11},
	keywords = {vt-labels},
	pages = {230--253},
	file = {Sebastián et al. - 2016 - AVclass A Tool for Massive Malware Labeling.pdf:/home/fmind/Documents/Zotero/storage/9YVTC5QL/Sebastián et al. - 2016 - AVclass A Tool for Massive Malware Labeling.pdf:application/pdf}
}

@inproceedings{gashi_study_2013,
	title = {A study of the relationship between antivirus regressions and label changes},
	isbn = {978-1-4799-2366-3},
	url = {http://ieeexplore.ieee.org/document/6698897/},
	doi = {10.1109/ISSRE.2013.6698897},
	abstract = {AntiVirus (AV) products use multiple components to detect malware. A component which is found in virtually all AVs is the signature-based detection engine: this component assigns a particular signature label to a malware that the AV detects. In previous analysis [1-3], we observed cases of regressions in several different AVs: i.e. cases where on a particular date a given AV detects a given malware but on a later date the same AV fails to detect the same malware. We studied this aspect further by analyzing the only externally observable behaviors from these AVs, namely whether AV engines detect a malware and what labels they assign to the detected malware. In this paper we present the results of the analysis about the relationship between the changing of the labels with which AV vendors recognize malware and the AV regressions.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {IEEE},
	author = {Gashi, Ilir and Sobesto, Bertrand and Mason, Stephen and Stankovic, Vladimir and Cukier, Michel},
	month = nov,
	year = {2013},
	pages = {441--450},
	file = {Gashi et al. - 2013 - A study of the relationship between antivirus regr.pdf:/home/fmind/Documents/Zotero/storage/2TY2MIDJ/Gashi et al. - 2013 - A study of the relationship between antivirus regr.pdf:application/pdf}
}

@article{dice_measures_1945,
	title = {Measures of the {Amount} of {Ecologic} {Association} {Between} {Species}},
	volume = {26},
	issn = {00129658},
	url = {http://doi.wiley.com/10.2307/1932409},
	doi = {10.2307/1932409},
	language = {en},
	number = {3},
	urldate = {2018-04-11},
	journal = {Ecology},
	author = {Dice, Lee R.},
	month = jul,
	year = {1945},
	pages = {297--302},
	file = {Dice - 1945 - Measures of the Amount of Ecologic Association Bet.pdf:/home/fmind/Documents/Zotero/storage/FYY9G6DH/Dice - 1945 - Measures of the Amount of Ecologic Association Bet.pdf:application/pdf}
}

@article{cohen_comparison_2003,
	title = {A {Comparison} of {String} {Metrics} for {Matching} {Names} and {Records}},
	abstract = {We describe an open-source Java toolkit of methods for matching names and records. We summarize results obtained from using various string distance metrics on the task of matching entity names. These metrics include distance functions proposed by several different communities, such as edit-distance metrics, fast heuristic string comparators, token-based distance metrics, and hybrid methods. We then describe an extension to the toolkit which allows records to be compared. We discuss some issues involved in performing a similar comparision for record-matching techniques, and ﬁnally present results for some baseline record-matching algorithms that aggregate string comparisons between ﬁelds.},
	language = {en},
	author = {Cohen, William W and Ravikumar, Pradeep and Fienberg, Stephen E},
	year = {2003},
	pages = {6},
	file = {Cohen et al. - A Comparison of String Metrics for Matching Names .pdf:/home/fmind/Documents/Zotero/storage/ZMLIGRHN/Cohen et al. - A Comparison of String Metrics for Matching Names .pdf:application/pdf}
}

@article{prim_shortest_1957,
	title = {Shortest {Connection} {Networks} {And} {Some} {Generalizations}},
	volume = {36},
	issn = {00058580},
	url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6773228},
	doi = {10.1002/j.1538-7305.1957.tb01515.x},
	language = {en},
	number = {6},
	urldate = {2018-04-11},
	journal = {Bell System Technical Journal},
	author = {Prim, R. C.},
	month = nov,
	year = {1957},
	pages = {1389--1401},
	file = {Prim - 1957 - Shortest Connection Networks And Some Generalizati.pdf:/home/fmind/Documents/Zotero/storage/ZUZPC5VC/Prim - 1957 - Shortest Connection Networks And Some Generalizati.pdf:application/pdf}
}

@misc{ahn_how_2018,
	title = {How we fought bad apps and malicious developers in 2017},
	url = {https://android-developers.googleblog.com/2018/01/how-we-fought-bad-apps-and-malicious.html},
	language = {en},
	urldate = {2018-04-01},
	journal = {How we fought bad apps and malicious developers in 2017},
	author = {Ahn, Andrew},
	month = jan,
	year = {2018},
	note = {https://android-developers.googleblog.com/2018/01/how-we-fought-bad-apps-and-malicious.html}
}

@article{li_androzoo++:_2017,
	title = {{AndroZoo}++: {Collecting} {Millions} of {Android} {Apps} and {Their} {Metadata} for the {Research} {Community}},
	abstract = {We present a growing collection of Android apps collected from several sources, including the oﬃcial Google Play app market and a growing collection of various metadata of those collected apps aiming at facilitating the Android-relevant research works. Our dataset by far has collected over ﬁve million apps and over 20 types of metadata such as VirusTotal reports. Our objective of collecting this dataset is to contribute to ongoing research eﬀorts, as well as to enable new potential research topics on Android Apps. By releasing our app and metadata set to the research community, we also aim at encouraging our fellow researchers to engage in reproducible experiments.},
	language = {en},
	author = {Li, Li and Gao, Jun and Hurier, Mederic and Kong, Pingfan and Bissyande, Tegawende F and Bartel, Alexandre and Klein, Jacques and Traon, Yves Le},
	month = sep,
	year = {2017},
	pages = {21},
	file = {Li et al. - AndroZoo++ Collecting Millions of Android Apps an.pdf:/home/fmind/Documents/Zotero/storage/PHKAH5YQ/Li et al. - AndroZoo++ Collecting Millions of Android Apps an.pdf:application/pdf}
}

@inproceedings{bianchi_what_2015,
	title = {What the {App} is {That}? {Deception} and {Countermeasures} in the {Android} {User} {Interface}},
	isbn = {978-1-4673-6949-7},
	shorttitle = {What the {App} is {That}?},
	url = {http://ieeexplore.ieee.org/document/7163069/},
	doi = {10.1109/SP.2015.62},
	abstract = {Mobile applications are part of the everyday lives of billions of people, who often trust them with sensitive information. These users identify the currently focused app solely by its visual appearance, since the GUIs of the most popular mobile OSes do not show any trusted indication of the app origin.},
	language = {en},
	urldate = {2018-04-11},
	publisher = {IEEE},
	author = {Bianchi, Antonio and Corbetta, Jacopo and Invernizzi, Luca and Fratantonio, Yanick and Kruegel, Christopher and Vigna, Giovanni},
	month = may,
	year = {2015},
	pages = {931--948},
	file = {Bianchi et al. - 2015 - What the App is That Deception and Countermeasure.pdf:/home/fmind/Documents/Zotero/storage/BSSGQB8F/Bianchi et al. - 2015 - What the App is That Deception and Countermeasure.pdf:application/pdf}
}

@misc{tetali_keeping_2018,
	title = {Keeping 2 billion {Android} devices safe with machine learning},
	url = {https://security.googleblog.com/2018/05/keeping-2-billion-android-devices-safe.html},
	language = {en},
	urldate = {2018-05-18},
	author = {Tetali, Sai},
	month = may,
	year = {2018},
	keywords = {detection, official}
}

@misc{statcounter_mobile_2019,
	title = {Mobile {Operating} {System} {Market} {Share} {Worldwide}},
	url = {http://gs.statcounter.com/os-market-share/mobile/worldwide/},
	urldate = {2019-01-01},
	journal = {Mobile Operating System Market Share Worldwide},
	author = {statcounter},
	year = {2019},
	note = {http://gs.statcounter.com/os-market-share/mobile/worldwide/}
}

@misc{statista_number_2019,
	title = {Number of available applications in the {Google} {Play} {Store} from {December} 2009 to {December} 2018},
	url = {https://www.statista.com/statistics/266210/number-of-available-applications-in-the-google-play-store/},
	urldate = {2019-01-01},
	journal = {Number of available applications in the Google Play Store from December 2009 to December 2018},
	author = {Statista},
	year = {2019},
	note = {https://www.statista.com/statistics/266210/number-of-available-applications-in-the-google-play-store/}
}

@misc{verizon_2018_2019,
	title = {2018 {Data} {Breach} {Investigations} {Report}},
	url = {https://enterprise.verizon.com/resources/reports/dbir/},
	urldate = {2019-01-01},
	journal = {2018 Data Breach Investigations Report},
	author = {Verizon},
	year = {2019},
	note = {https://enterprise.verizon.com/resources/reports/dbir/}
}

@misc{leskin_21_2018,
	title = {The 21 scariest data breaches of 2018},
	url = {https://www.businessinsider.fr/us/data-hacks-breaches-biggest-of-2018-2018-12},
	urldate = {2019-01-01},
	journal = {The 21 scariest data breaches of 2018},
	author = {Leskin, Paige},
	month = dec,
	year = {2018},
	note = {https://www.businessinsider.fr/us/data-hacks-breaches-biggest-of-2018-2018-12}
}

@misc{google_android_2018,
	title = {Android {Security} 2017 {Year} {In} {Review}},
	url = {https://source.android.com/security/reports/Google_Android_Security_2017_Report_Final.pdf},
	urldate = {2019-01-01},
	journal = {Android Security 2017 Year In Review},
	author = {Google},
	year = {2018},
	file = {Full Text:/home/fmind/Documents/Zotero/storage/7A49D4Y7/Google - 2018 - Android Security 2017 Year In Review.pdf:application/pdf}
}

@misc{sophos_sophoslabs_2019,
	title = {{SophosLabs} 2019 - {Threat} {Report}},
	url = {https://www.sophos.com/en-us/medialibrary/pdfs/technical-papers/sophoslabs-2019-threat-report.pdf},
	urldate = {2019-01-01},
	journal = {SophosLabs 2019 - Threat Report},
	author = {Sophos},
	year = {2019}
}

@misc{upstream_systems_secure-d_2019,
	title = {Secure-{D} uncovers pre-installed malware com.tct.weather on {Alcatel} {Android} smartphones manufactured by {TCL}},
	url = {https://www.upstreamsystems.com/secure-d-uncovers-pre-installed-malware-alcatel-android-smartphones-manufactured-tcl/},
	urldate = {2019-01-01},
	journal = {Secure-D uncovers pre-installed malware com.tct.weather on Alcatel Android smartphones manufactured by TCL},
	author = {Upstream Systems},
	month = jan,
	year = {2019}
}

@misc{google_permissions_2019,
	title = {Permissions overview},
	url = {https://developer.android.com/guide/topics/permissions/overview},
	urldate = {2019-01-01},
	journal = {Permissions overview},
	author = {Google},
	year = {2019}
}

@misc{oxford_dictionnary_malware_2019,
	title = {Malware},
	url = {https://en.oxforddictionaries.com/definition/malware},
	urldate = {2019-01-01},
	journal = {Malware Definition},
	author = {Oxford Dictionnary},
	year = {2019}
}

@misc{symantec_android.kuguo_2019,
	title = {Android.{Kuguo}},
	url = {https://www.symantec.com/security-center/writeup/2014-040315-5215-99},
	urldate = {2019-01-01},
	journal = {Android.Kuguo},
	author = {Symantec},
	year = {2019}
}

@misc{ics2_cybersecurity_2018,
	title = {Cybersecurity skills shortage soars, nearning 3 million},
	url = {https://blog.isc2.org/isc2_blog/2018/10/cybersecurity-skills-shortage-soars-nearing-3-million.html},
	urldate = {2019-01-01},
	journal = {Cybersecurity skills shortage soars, nearning 3 million},
	author = {ICS2},
	month = oct,
	year = {2018}
}

@misc{forbes_cybersecurity_2018,
	title = {The {Cybersecurity} {Talent} {Gap} {Is} {An} {Industry} {Crisis}},
	url = {https://www.forbes.com/sites/forbestechcouncil/2018/08/09/the-cybersecurity-talent-gap-is-an-industry-crisis/#9c53e10a6b36},
	urldate = {2019-01-01},
	journal = {The Cybersecurity Talent Gap Is An Industry Crisis},
	author = {Forbes},
	month = sep,
	year = {2018}
}

@misc{av-test_malware_2019,
	title = {Malware {Statistics}},
	url = {https://www.av-test.org/en/statistics/malware/},
	urldate = {2019-01-01},
	journal = {Malware Statistics},
	author = {AV-TEST},
	year = {2019}
}

@misc{datafloq_machine_2019,
	title = {Machine {Learning} {Explained}},
	url = {https://datafloq.com/read/machine-learning-explained-understanding-learning/4478},
	urldate = {2019-01-01},
	journal = {Machine Learning Explained},
	author = {DataFloq},
	year = {2019}
}

@book{bellman_dynamic_2013,
	title = {Dynamic programming},
	publisher = {Courier Corporation},
	author = {Bellman, Richard},
	year = {2013}
}

@book{csikszentmihalyi_flow_2014,
	title = {Flow and the foundations of positive psychology},
	publisher = {Springer},
	author = {Csikszentmihalyi, Mihaly and Larson, R},
	year = {2014}
}

@misc{solvusoft_how_nodate,
	title = {How to {Remove} {Android}:{AccuTrack}-{A}},
	url = {https://www.solvusoft.com/en/malware/potentially-unwanted-application/android-accutrack-a/},
	urldate = {2019-01-01},
	journal = {https://www.solvusoft.com/en/malware/potentially-unwanted-application/android-accutrack-a/},
	author = {Solvusoft}
}

@article{pfitzner_characterization_2009,
	title = {Characterization and evaluation of similarity measures for pairs of clusterings},
	volume = {19},
	issn = {02191377},
	doi = {10.1007/s10115-008-0150-6},
	abstract = {In evaluating the results of cluster analysis, it is common practice to make use of a number of fixed heuristics rather than to compare a data clustering directly against an empirically derived standard, such as a clustering empirically obtained from human informants. Given the dearth of research into techniques to express the similarity between clusterings, there is broad scope for fundamental research in this area. In defining the comparative problem, we identify two types of worst-case matches between pairs of clusterings, characterised as independently codistributed clustering pairs and conjugate partition pairs. Desirable behaviour for a similarity measure in either of the two worst cases is discussed, giving rise to five test scenarios in which characteristics of one of a pair of clusterings was manipulated in order to compare and contrast the behaviour of different clustering similarity measures. This comparison is carried out for previously-proposed clustering similarity measures, as well as a number of established similarity measures that have not previously been applied to clustering comparison. We introduce a paradigm apparatus for the evaluation of clustering comparison techniques and distinguish between the goodness of clusterings and the similarity of clusterings by clarifying the degree to which different measures confuse the two. Accompanying this is the proposal of a novel clustering similarity measure, the Measure of Concordance (\{MoC).\} We show that only \{MoC\}, Powers’s measure, Lopez and Rajski’s measure and various forms of Normalised Mutual Information exhibit the desired behaviour under each of the test scenarios.},
	number = {3},
	journal = {Knowledge and Information Systems},
	author = {Pfitzner, Darius and Leibbrandt, Richard and Powers, David},
	year = {2009},
	keywords = {Cluster comparison, Clustering, Evaluation, large-scale-analysis, Review, Similarity measures},
	pages = {361--394}
}

@book{hurier_definition_nodate,
	title = {Definition of {Ouroboros}},
	author = {Hurier, Médéric}
}

@book{symantec_symantec._2015,
	title = {Symantec. {Istr} 20 - internet security threat report},
	author = {{Symantec}},
	month = apr,
	year = {2015}
}

@book{kleidermacher_android_2018,
	title = {Android {Security} 2017},
	url = {https://security.googleblog.com/2018/03/android-security-2017-year-in-review.html},
	language = {en},
	urldate = {2018-04-01},
	author = {Kleidermacher, Dave},
	month = mar,
	year = {2018}
}

@article{arzt_flowdroid:_2014,
	title = {Flowdroid: {Precise} context, flow, field, object-sensitive and lifecycle-aware taint analysis for android apps},
	volume = {49},
	number = {6},
	journal = {Acm Sigplan Notices},
	author = {Arzt, Steven and Rasthofer, Siegfried and Fritz, Christian and Bodden, Eric and Bartel, Alexandre and Klein, Jacques and Le Traon, Yves and Octeau, Damien and McDaniel, Patrick},
	year = {2014},
	pages = {259--269}
}

@inproceedings{avdiienko_mining_2015,
	title = {Mining apps for abnormal usage of sensitive data},
	booktitle = {Proceedings of the 37th {International} {Conference} on {Software} {Engineering}-{Volume} 1},
	publisher = {IEEE Press},
	author = {Avdiienko, Vitalii and Kuznetsov, Konstantin and Gorla, Alessandra and Zeller, Andreas and Arzt, Steven and Rasthofer, Siegfried and Bodden, Eric},
	year = {2015},
	pages = {426--436}
}

@inproceedings{spreitzer_procharvester:_2018,
	title = {Procharvester: {Fully} automated analysis of procfs side-channel leaks on android},
	booktitle = {Proceedings of the 2018 on {Asia} {Conference} on {Computer} and {Communications} {Security}},
	publisher = {ACM},
	author = {Spreitzer, Raphael and Kirchengast, Felix and Gruss, Daniel and Mangard, Stefan},
	year = {2018},
	pages = {749--763}
}

@inproceedings{kiss_kharon_2016,
	address = {San Jose, United States},
	title = {Kharon dataset: {Android} malware under a microscope},
	url = {https://hal-univ-orleans.archives-ouvertes.fr/hal-01300752},
	booktitle = {Learning from {Authoritative} {Security} {Experiment} {Results}},
	publisher = {The USENIX Association},
	author = {Kiss, Nicolas and Lalande, Jean-François and Leslous, Mourad and Viet Triem Tong, Valérie},
	month = may,
	year = {2016},
	keywords = {malware}
}

@article{li_static_2017,
	title = {Static analysis of android apps: {A} systematic literature review},
	volume = {88},
	journal = {Information and Software Technology},
	author = {Li, Li and Bissyandé, Tegawendé F and Papadakis, Mike and Rasthofer, Siegfried and Bartel, Alexandre and Octeau, Damien and Klein, Jacques and Traon, Le},
	year = {2017},
	pages = {67--95}
}

@article{geiger_datasets_2018,
	title = {Datasets of {Android} {Applications}: a {Literature} {Review}},
	journal = {arXiv preprint arXiv:1809.10069},
	author = {Geiger, Franz-Xaver and Malavolta, Ivano},
	year = {2018}
}

@article{lipton_mythos_2018,
	title = {The {Mythos} of {Model} {Interpretability}},
	volume = {16},
	journal = {ACM Queue},
	author = {Lipton, Zachary Chase},
	year = {2018},
	pages = {30}
}

@article{murdoch_interpretable_2019,
	title = {Interpretable machine learning: definitions, methods, and applications},
	journal = {arXiv preprint arXiv:1901.04592},
	author = {Murdoch, W James and Singh, Chandan and Kumbier, Karl and Abbasi-Asl, Reza and Yu, Bin},
	year = {2019}
}

@article{doshi-velez_towards_2017,
	title = {Towards a rigorous science of interpretable machine learning},
	journal = {arXiv preprint arXiv:1702.08608},
	author = {Doshi-Velez, Finale and Kim, Been},
	year = {2017}
}

@incollection{lundberg_unified_2017,
	title = {A {Unified} {Approach} to {Interpreting} {Model} {Predictions}},
	url = {http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf},
	booktitle = {Advances in {Neural} {Information} {Processing} {Systems} 30},
	publisher = {Curran Associates, Inc.},
	author = {Lundberg, Scott M and Lee, Su-In},
	editor = {Guyon, I. and Luxburg, U. V. and Bengio, S. and Wallach, H. and Fergus, R. and Vishwanathan, S. and Garnett, R.},
	year = {2017},
	pages = {4765--4774}
}

@inproceedings{ribeiro_why_2016,
	title = {"{Why} {Should} {I} {Trust} {You}?": {Explaining} the {Predictions} of {Any} {Classifier}},
	booktitle = {Proceedings of the 22nd {ACM} {SIGKDD} {International} {Conference} on {Knowledge} {Discovery} and {Data} {Mining}, {San} {Francisco}, {CA}, {USA}, {August} 13-17, 2016},
	author = {Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos},
	year = {2016},
	pages = {1135--1144}
}

@article{gunning_explainable_2017,
	title = {Explainable artificial intelligence (xai)},
	journal = {Defense Advanced Research Projects Agency (DARPA), nd Web},
	author = {Gunning, David},
	year = {2017}
}

@misc{circl_misp_nodate,
	title = {{MISP} - {Open} {Source} {Threat} {Intelligence} {Platform} \& {Open} {Standards} {For} {Threat} {Information} {Sharing}},
	url = {https://www.misp-project.org/},
	urldate = {2019-01-01},
	journal = {MISP - Open Source Threat Intelligence Platform \& Open Standards For Threat Information Sharing},
	author = {CIRCL}
}

@misc{diop_explainable_nodate,
	title = {Explainable {AI}: {The} data scientists’ new challenge},
	url = {https://towardsdatascience.com/explainable-ai-the-data-scientists-new-challenge-f7cac935a5b4},
	urldate = {2019-01-01},
	journal = {Explainable AI: The data scientists’ new challenge},
	author = {Diop, Mouhamadou-Lamine}
}

@inproceedings{dosilovic_explainable_2018,
	title = {Explainable artificial intelligence: {A} survey},
	doi = {10.23919/MIPRO.2018.8400040},
	booktitle = {2018 41st {International} {Convention} on {Information} and {Communication} {Technology}, {Electronics} and {Microelectronics} ({MIPRO})},
	author = {Došilović, F. K. and Brčić, M. and Hlupić, N.},
	month = may,
	year = {2018},
	keywords = {(super)human performance, artificial general intelligence, comprehensibility, computing power, datasets, Decision trees, explainability, explainable artificial intelligence, finance, healthcare, image recognition, interpretability, learning (artificial intelligence), Machine learning, machine learning systems, Optimization, Predictive models, recent developments, speech analysis, state-of-the-art models, strategic game planning, supervised learning, Supervised learning, Support vector machines, transparency, XAI},
	pages = {0210--0215}
}

@article{adadi_peeking_2018,
	title = {Peeking {Inside} the {Black}-{Box}: {A} {Survey} on {Explainable} {Artificial} {Intelligence} ({XAI})},
	volume = {6},
	issn = {2169-3536},
	doi = {10.1109/ACCESS.2018.2870052},
	abstract = {At the dawn of the fourth industrial revolution, we are witnessing a fast and widespread adoption of artificial intelligence (AI) in our daily life, which contributes to accelerating the shift towards a more algorithmic society. However, even with such unprecedented advancements, a key impediment to the use of AI-based systems is that they often lack transparency. Indeed, the black-box nature of these systems allows powerful predictions, but it cannot be directly explained. This issue has triggered a new debate on explainable AI (XAI). A research field holds substantial promise for improving trust and transparency of AI-based systems. It is recognized as the sine qua non for AI to continue making steady progress without disruption. This survey provides an entry point for interested researchers and practitioners to learn key aspects of the young and rapidly growing body of research related to XAI. Through the lens of the literature, we review the existing approaches regarding the topic, discuss trends surrounding its sphere, and present major research trajectories.},
	journal = {IEEE Access},
	author = {Adadi, A. and Berrada, M.},
	year = {2018},
	keywords = {explainable artificial intelligence, Machine learning, XAI, AI-based systems, artificial intelligence, Biological system modeling, black-box models, black-box nature, Conferences, explainable AI, Explainable artificial intelligence, fourth industrial revolution, interpretable machine learning, Machine learning algorithms, Market research, Prediction algorithms},
	pages = {52138--52160}
}

@article{shrikumar_learning_2017,
	title = {Learning {Important} {Features} {Through} {Propagating} {Activation} {Differences}},
	volume = {abs/1704.02685},
	url = {http://arxiv.org/abs/1704.02685},
	journal = {CoRR},
	author = {Shrikumar, Avanti and Greenside, Peyton and Kundaje, Anshul},
	year = {2017}
}

@article{tramer_ensemble_2017,
	title = {Ensemble adversarial training: {Attacks} and defenses},
	journal = {arXiv preprint arXiv:1705.07204},
	author = {Tramèr, Florian and Kurakin, Alexey and Papernot, Nicolas and Goodfellow, Ian and Boneh, Dan and McDaniel, Patrick},
	year = {2017}
}

@article{athalye_obfuscated_2018,
	title = {Obfuscated {Gradients} {Give} a {False} {Sense} of {Security}: {Circumventing} {Defenses} to {Adversarial} {Examples}},
	volume = {abs/1802.00420},
	url = {http://arxiv.org/abs/1802.00420},
	journal = {CoRR},
	author = {Athalye, Anish and Carlini, Nicholas and Wagner, David A.},
	year = {2018}
}

@misc{cognitech_datomic_nodate,
	title = {Datomic {Website}},
	url = {https://www.datomic.com/},
	urldate = {2019-01-01},
	journal = {Datomic Website},
	author = {Cognitech}
}

@misc{elastic_elastic_nodate,
	title = {Elastic {Website}},
	url = {https://www.elastic.co},
	urldate = {2019-01-01},
	journal = {Elastic Website},
	author = {Elastic}
}

@misc{android_source_android_nodate,
	title = {Android {Framework}},
	url = {https://source.android.com/security/},
	urldate = {2019-01-01},
	journal = {Security},
	author = {Android Source}
}

@misc{android_website_android_2019,
	title = {Android {Security}},
	url = {https://www.android.com/enterprise/security/},
	journal = {Android Security},
	author = {Android Website},
	month = jan,
	year = {2019}
}

@misc{android_android_2019,
	title = {Android {History}},
	url = {https://www.android.com/history/},
	journal = {Android History},
	author = {Android},
	month = jan,
	year = {2019}
}

@misc{vaseekaran_machine_2018,
	title = {Machine {Learning}: {Supervised} {Learning} vs {Unsupervised} {Learning}},
	url = {https://medium.com/@gowthamy/machine-learning-supervised-learning-vs-unsupervised-learning-f1658e12a780},
	urldate = {2019-01-01},
	journal = {Machine Learning: Supervised Learning vs Unsupervised Learning},
	author = {Vaseekaran, Gowthamy},
	month = sep,
	year = {2018}
}

@misc{android_developers_announcing_2008,
	title = {Announcing the {Android} 1.0 {SDK}, release 1},
	url = {https://android-developers.googleblog.com/2008/09/announcing-android-10-sdk-release-1.html},
	urldate = {2019-01-01},
	journal = {Announcing the Android 1.0 SDK, release 1},
	author = {Android Developers},
	month = sep,
	year = {2008}
}

@misc{android_android_2019-1,
	title = {Android {Content} {License}},
	url = {https://source.android.com/setup/start/licenses},
	urldate = {2019-01-01},
	journal = {Android Content License},
	author = {Android},
	month = jan,
	year = {2019}
}

@misc{open_hub_android_2019,
	title = {Android {Language} {Breakdown}},
	url = {https://www.openhub.net/p/android/analyses/latest/languages_summary},
	urldate = {2019-01-01},
	journal = {Android Language Breakdown},
	author = {Open Hub},
	month = jan,
	year = {2019}
}

@misc{google_google_2019,
	title = {Google {Play}},
	url = {https://play.google.com/store},
	urldate = {2019-01-01},
	journal = {Google Play},
	author = {Google},
	month = jan,
	year = {2019}
}

@misc{google_google_2019-1,
	title = {Google {Play} {Protect}},
	url = {https://www.android.com/play-protect/},
	urldate = {2019-01-01},
	journal = {Google Play Protect},
	author = {Google},
	month = jan,
	year = {2019}
}

@misc{android_developer_2019,
	title = {Developer {Policy} {Center}},
	url = {https://play.google.com/about/developer-content-policy/},
	urldate = {2019-01-01},
	journal = {Developer Policy Center},
	author = {Android},
	month = jan,
	year = {2019}
}

@misc{android_developers_intents_2019,
	title = {Intents and {Intent} {Filters}},
	url = {https://developer.android.com/guide/components/intents-filters},
	urldate = {2019-01-01},
	journal = {Intents and Intent Filters},
	author = {Android Developers},
	month = jan,
	year = {2019}
}

@misc{android_developers_application_2019,
	title = {Application {Sandbox}},
	url = {https://source.android.com/security/app-sandbox},
	urldate = {2019-01-01},
	journal = {Application Sandbox},
	author = {Android Developers},
	month = jan,
	year = {2019}
}

@misc{android_developers_security_2019,
	title = {Security {Essential} {Checklist}},
	url = {https://developer.android.com/topic/security},
	urldate = {2019-01-01},
	journal = {Security Essential Checklist},
	author = {Android Developers},
	month = jan,
	year = {2019}
}

@misc{android_developers_application_2019-1,
	title = {Application {Security}},
	url = {https://source.android.com/security/overview/app-security},
	urldate = {2019-01-01},
	journal = {Application Security},
	author = {Android Developers},
	month = jan,
	year = {2019}
}

@misc{ozaki_decision_2015,
	title = {Decision {Boundaries} for {Deep} {Learning} and other {Machine} {Learning} classifiers},
	url = {https://www.kdnuggets.com/2015/06/decision-boundaries-deep-learning-machine-learning-classifiers.html},
	urldate = {2019-01-01},
	journal = {Decision Boundaries for Deep Learning and other Machine Learning classifiers},
	author = {OZAKI, Takashi},
	month = jun,
	year = {2015}
}

@article{zou_regularization_2005,
	title = {Regularization and variable selection via the elastic net},
	volume = {67},
	number = {2},
	journal = {Journal of the royal statistical society: series B (statistical methodology)},
	author = {Zou, Hui and Hastie, Trevor},
	year = {2005},
	pages = {301--320}
}

@misc{bhande_what_2018,
	title = {What is underfitting and overfitting in machine learning and how to deal with it.},
	url = {https://medium.com/greyatom/what-is-underfitting-and-overfitting-in-machine-learning-and-how-to-deal-with-it-6803a989c76},
	urldate = {2019-01-01},
	journal = {What is underfitting and overfitting in machine learning and how to deal with it.},
	author = {Bhande, Anup},
	month = may,
	year = {2018}
}