-
Notifications
You must be signed in to change notification settings - Fork 0
/
reliTest.bib
232 lines (232 loc) · 29.3 KB
/
reliTest.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
@article{SASSIFI,
abstract = {As GPUs become more pervasive in both scalable high-performance computing systems and safety-critical embedded systems, evaluating and analyzing their resilience to soft errors caused by high-energy particle strikes will grow increasingly important. GPU designers must develop tools and techniques to understand the effect of these soft errors on applications. This paper presents an error injection-based methodology and tool called SASSIFI to study the soft error resilience of massively parallel applications running on stateof-The-Art NVIDIA GPUs. Our approach uses a low-level assembly-language instrumentation tool called SASSI to profile and inject errors. SASSI provides efficiency by allowing instrumentation code to execute entirely on the GPU and provides the ability to inject into different architecture-visible state. For example, SASSIFI can inject errors in general-purpose registers, GPU memory, condition code registers, and predicate registers. SASSIFI can also inject errors into addresses and register indices. In this paper, we describe the SASSIFI tool, its capabilities, and present experiments to illustrate some of the analyses SASSIFI can be used to perform.},
author = {Hari, Siva Kumar Sastry and Tsai, Timothy and Stephenson, Mark and Keckler, Stephen W. and Emer, Joel},
doi = {10.1109/ISPASS.2017.7975296},
file = {:C\:/Users/fffas/Documents/Readspace/paper/2017-ISPASS-SASSIFI.pdf:pdf},
isbn = {9781538638897},
journal = {ISPASS 2017 - IEEE International Symposium on Performance Analysis of Systems and Software},
mendeley-groups = {understand_prop,autosurvey},
number = {1},
pages = {249--258},
title = {{SASSIFI: An architecture-level fault injection tool for GPU application resilience evaluation}},
year = {2017}
}
@article{Chaudhuri2021,
abstract = {Owing to the inherent fault tolerance of deep neural networks (DNNs), many structural faults in DNN accelerators tend to be functionally benign. In order to identify functionally critical faults, we analyze the functional impact of stuck-at faults in the processing elements of a 128×128 systolic-array accelerator that performs inferencing on the MNIST dataset. We present a 2-tier machine-learning framework that leverages graph convolutional networks (GCNs) for quick assessment of the functional criticality of structural faults. We describe a computationally efficient methodology for data sampling and feature engineering to train the GCN-based framework. The proposed framework achieves up to 90% classification accuracy with negligible misclassification of critical faults.},
author = {Chaudhuri, Arjun and Talukdar, Jonti and Jung, Jinwook and Nam, Gi Joon and Chakrabarty, Krishnendu},
doi = {10.23919/DATE51398.2021.9474128},
file = {:C\:/Users/fffas/Documents/Readspace/paper/Fault-Criticality_Assessment_for_AI_Accelerators_using_Graph_Convolutional_Networks.pdf:pdf},
isbn = {9783981926354},
issn = {15301591},
journal = {Proceedings -Design, Automation and Test in Europe, DATE},
mendeley-groups = {autosurvey},
pages = {1596--1599},
title = {{Fault-Criticality Assessment for AI Accelerators using Graph Convolutional Networks}},
volume = {2021-Febru},
year = {2021}
}
@article{FIdelity,
abstract = {We present a resilience analysis framework, called FIdelity, to accurately and quickly analyze the behavior of hardware errors in deep learning accelerators. Our framework enables resilience analysis starting from the very beginning of the design process to ensure that the reliability requirements are met, so that these accelerators can be safely deployed for a wide range of applications, including safety-critical applications such as self-driving cars. Existing resilience analysis techniques suffer from the following limitations: 1. general-purpose hardware techniques can achieve accurate results, but they require access to RTL to perform timeconsuming RTL simulations, which is not feasible for early design exploration; 2. general-purpose software techniques can produce results quickly, but they are highly inaccurate; 3. techniques targeting deep learning accelerators only focus on memory errors. Our FIdelity framework overcomes these limitations. FIdelity only requires a minimal amount of high-level design information that can be obtained from architectural descriptions/block diagrams, or estimated and varied for sensitivity analysis. By leveraging unique architectural properties of deep learning accelerators, we are able to systematically model a major class of hardware errors - transient errors in logic components - in software with high fidelity. Therefore, FIdelity is both quick and accurate, and does not require access to RTL. We thoroughly validate our FIdelity framework using Nvidia's open-source accelerator called NVDLA, which shows that the results are highly accurate - out of 60K fault injection experiments, the software fault models derived using FIdelity closely match the behaviors observed from RTL simulations. Using the validated FIdelity framework, we perform a large-scale resilience study on NVDLA, which consists of 46M fault injection experiments running various representative deep neural network applications. We report the key findings and architectural insights, which can be used to guide the design of future accelerators.},
author = {He, Yi and Balaprakash, Prasanna and Li, Yanjing},
doi = {10.1109/MICRO50266.2020.00033},
file = {:C\:/Users/fffas/Documents/Readspace/paper/FIdelity.pdf:pdf},
isbn = {9781728173832},
issn = {10724451},
journal = {Proceedings of the Annual International Symposium on Microarchitecture, MICRO},
mendeley-groups = {understand_prop,autosurvey},
pages = {270--281},
title = {{Fidelity: Efficient resilience analysis framework for deep learning accelerators}},
volume = {2020-Octob},
year = {2020}
}
@article{Joseph2016,
abstract = {Training Deep Neural Networks is complicated by the fact that the distribution of each layer's inputs changes during training, as the parame- ters of the previous layers change. This slows down the training by requiring lower learning rates and careful parameter initialization, and makes it notoriously hard to train models with saturating nonlinearities. We refer to this phe- nomenon as internal covariate shift, and ad- dress the problem by normalizing layer inputs. Our method draws its strength from making nor- malization a part of the model architecture and performing the normalization for each training mini-batch. Batch Normalization allows us to use much higher learning rates and be less care- ful about initialization, and in some cases elim- inates the need for Dropout. Applied to a state- of-the-art image classification model, Batch Nor- malization achieves the same accuracy with 14 times fewer training steps, and beats the original model by a significant margin. Using an ensem- ble of batch-normalized networks, we improve upon the best published result on ImageNet clas- sification: reaching 4.82% top-5 test error, ex- ceeding the accuracy of human raters.},
author = {Joseph, Sue},
doi = {10.1080/17512786.2015.1058180},
file = {:C\:/Users/fffas/Documents/Readspace/paper/BatchNorm.pdf:pdf},
issn = {17512794},
journal = {Journalism Practice},
keywords = {Australian literary journalism,Helen Garner,ethics,long-form narrative,missing voices,trauma,true crime},
mendeley-groups = {autosurvey},
number = {6},
pages = {730--743},
title = {{Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift}},
volume = {10},
year = {2016}
}
@article{Dong2021,
abstract = {Automatic algorithm-hardware co-design for DNN has shown great success in improving the performance of DNNs on FPGAs. However, this process remains challenging due to the intractable search space of neural network architectures and hardware accelerator implementation. Differing from existing hardware-aware neural architecture search (NAS) algorithms that rely solely on the expensive learning-based approaches, our work incorporates integer programming into the search algorithm to prune the design space. Given a set of hardware resource constraints, our integer programming formulation directly outputs the optimal accelerator configuration for mapping a DNN subgraph that minimizes latency. We use an accuracy predictor for different DNN subgraphs with different quantization schemes and generate accuracy-latency pareto frontiers. With low computational cost, our algorithm can generate quantized networks that achieve state-of-the-art accuracy and hardware performance on Xilinx Zynq (ZU3EG) FPGA for image classification on ImageNet dataset. The solution searched by our algorithm achieves 72.5% top-1 accuracy on ImageNet at framerate 50, which is 60% faster than MnasNet [37] and 135% faster than FBNet [43] with comparable accuracy.},
archivePrefix = {arXiv},
arxivId = {2104.12766},
author = {Dong, Zhen and Gao, Yizhao and Huang, Qijing and Wawrzynek, John and So, Hayden K.H. and Keutzer, Kurt},
doi = {10.1109/FCCM51124.2021.00014},
eprint = {2104.12766},
file = {:C\:/Users/fffas/Documents/Workspace/LLM/gptsurvey/refpdf/HAO_Hardware-aware_Neural_Architecture_Optimization_for_Efficient_Inference.pdf:pdf},
isbn = {9780738126739},
journal = {Proceedings - 29th IEEE International Symposium on Field-Programmable Custom Computing Machines, FCCM 2021},
keywords = {Efficient Deep Learning,HW SW Codesign,Image Classification,Neural Architecture Optimization,Neural Architecture Search,Quantization},
mendeley-groups = {autosurvey},
pages = {50--59},
title = {{HAO: Hardware-aware Neural Architecture Optimization for Efficient Inference}},
year = {2021}
}
@article{Yu2022,
abstract = {Pruning is an effective method to reduce the memory footprint and FLOPs associated with neural network models. However, existing structured-pruning methods often result in significant accuracy degradation for moderate pruning levels. To address this problem, we introduce a new Hessian Aware Pruning (HAP) method coupled with a Neural Implant approach that uses second-order sensitivity as a metric for structured pruning. The basic idea is to prune insensitive components and to use a Neural Implant for moderately sensitive components, instead of completely pruning them. For the latter approach, the moderately sensitive components are replaced with a low rank implant that is smaller and less computationally expensive than the original component. We use the relative Hessian trace to measure sensitivity, as opposed to the magnitude based sensitivity metric commonly used in the literature. We test HAP for both computer vision tasks and natural language tasks, and we achieve new state-of-the-art results. Specifically, HAP achieves less than 0.1%/0.5% degradation on PreResNet29/ResNet50 (CIFAR-10/ImageNet) with more than 70%/50% of parameters pruned. Meanwhile, HAP also achieves significantly better performance (up to 0.8% with 60% of parameters pruned) as compared to gradient based method for head pruning on transformer-based models. The framework has been open sourced and available online: https://github.com/yaozhewei/HAP.},
archivePrefix = {arXiv},
arxivId = {2101.08940},
author = {Yu, Shixing and Yao, Zhewei and Gholami, Amir and Dong, Zhen and Kim, Sehoon and Mahoney, Michael W. and Keutzer, Kurt},
doi = {10.1109/WACV51458.2022.00372},
eprint = {2101.08940},
file = {:C\:/Users/fffas/Documents/Workspace/LLM/gptsurvey/refpdf/HAP-Hessian-Aware Pruning and Optimal Neural Implant.pdf:pdf},
isbn = {9781665409155},
journal = {Proceedings - 2022 IEEE/CVF Winter Conference on Applications of Computer Vision, WACV 2022},
keywords = {Deep Learning,Efficient Training and Inference Methods for Networks},
mendeley-groups = {autosurvey},
pages = {3665--3676},
title = {{Hessian-Aware Pruning and Optimal Neural Implant}},
year = {2022}
}
@article{Zheng2021,
abstract = {With the emergence of big data and remarkable improvement of computational power, deep neural network (DNN) based intelligent systems, with the superb performance on computer vision, nature language processing, and optimization processing, etc, has been acceleratingly replacing traditional software in various aspects. However, due to the uncertainty of DNN modules learned from data, the intelligent systems are more likely to exhibit incorrect behaviors. Faults in software and hardware are also inevitably in practice, where the hidden defects can easily cause model failure. These will lead to severe accidents and losses in safety-and reliability-critical scenarios, such as autonomous driving. Techniques to test the differences between actual and desired behaviors and evaluate the reliability of DNN applications at faulty conditions is therefore significant for building a trustworthy DNN system. A popular method is fault injection and various fault injection tools have been developed for ML frameworks, such as Tensorflow, PyTorch. In this paper, we present a tool, MindFI, which targets to cover a variety of faults in ML programs written in Mindspore. Data, software and hardware faults can be easily injected in general Mindspore programs. We also use MindFI to evaluate the resilience of several commonly used ML programs against a assessment metrics.},
author = {Zheng, Yang and Feng, Zhenye and Hu, Zheng and Pei, Ke},
doi = {10.1109/ISSREW53611.2021.00068},
file = {:C\:/Users/fffas/Documents/Workspace/LLM/gptsurvey/refpdf/MindFI_A_Fault_Injection_Tool_for_Reliability_Assessment_of_MindSpore_Applicacions.pdf:pdf},
isbn = {9781665426039},
journal = {Proceedings - 2021 IEEE International Symposium on Software Reliability Engineering Workshops, ISSREW 2021},
keywords = {fault injection,neural network,reliability testing},
mendeley-groups = {autosurvey},
pages = {235--238},
publisher = {IEEE},
title = {{MindFI: A Fault Injection Tool for Reliability Assessment of MindSpore Applicacions}},
year = {2021}
}
@article{Li2018,
abstract = {As technology scales to lower feature sizes, devices become more susceptible to soft errors. Soft errors can lead to silent data corruptions (SDCs), seriously compromising the reliability of a system. Traditional hardware-only techniques to avoid SDCs are energy hungry, and hence not suitable for commodity systems. Researchers have proposed selective software-based protection techniques to tolerate hardware faults at lower costs. However, these techniques either use expensive fault injection or inaccurate analytical models to determine which parts of a program must be protected for preventing SDCs. In this work, we construct a three-level model, TRIDENT, that captures error propagation at the static data dependency, control-flow and memory levels, based on empirical observations of error propagations in programs. TRIDENT is implemented as a compiler module, and it can predict both the overall SDC probability of a given program and the SDC probabilities of individual instructions, without fault injection. We find that TRIDENT is nearly as accurate as fault injection and it is much faster and more scalable. We also demonstrate the use of TRIDENT to guide selective instruction duplication to efficiently mitigate SDCs under a given performance overhead bound.},
author = {Li, Guanpeng and Pattabiraman, Karthik and Hari, Siva Kumar Sastry and Sullivan, Michael and Tsai, Timothy},
doi = {10.1109/DSN.2018.00016},
file = {:C\:/Users/fffas/Documents/Workspace/LLM/gptsurvey/refpdf/Modeling_softerror_porp_in_programs.pdf:pdf},
isbn = {9781538655955},
journal = {Proceedings - 48th Annual IEEE/IFIP International Conference on Dependable Systems and Networks, DSN 2018},
keywords = {Error Propagation,Error Resilience,Program Analysis,Silent Data Corruption,Soft Error},
mendeley-groups = {autosurvey},
pages = {27--38},
title = {{Modeling Soft-Error propagation in programs}},
year = {2018}
}
@article{Savarese2022,
abstract = {We study the problem of training deep networks while quantizing parameters and activations into low-precision numeric representations, a setting central to reducing energy consumption and inference time of deployed models. We propose a method that learns different precisions, as measured by bits in numeric representations, for different weights in a neural network, yielding a heterogeneous allocation of bits across parameters. Learning precisions occurs alongside learning weight values, using a strategy derived from a novel framework wherein the intractability of optimizing discrete precisions is approximated by training per-parameter noise magnitudes. We broaden this framework to also encompass learning precisions for hidden state activations, simultaneously with weight precisions and values. Our approach exposes the objective of constructing a low-precision inference-efficient model to the entirety of the training process. Experiments show that it finds highly heterogeneous precision assignments for CNNs trained on CIFAR and ImageNet, improving upon previous state-of-the-art quantization methods. Our improvements extend to the challenging scenario of learning reduced-precision GANs.},
author = {Savarese, Pedro and Yuan, Xin and Li, Yanjing and Maire, Michael},
file = {:C\:/Users/fffas/Documents/Workspace/LLM/gptsurvey/refpdf/Not All Bits have Equal Value Heterogeneous Preci.pdf:pdf},
isbn = {9781713871088},
issn = {10495258},
journal = {Advances in Neural Information Processing Systems},
mendeley-groups = {autosurvey},
number = {NeurIPS},
title = {{Not All Bits have Equal Value: Heterogeneous Precisions via Trainable Noise}},
volume = {35},
year = {2022}
}
@article{Izhikevich1988,
abstract = {A model is presented that reproduces spiking and bursting behavior of known types of cortical neurons. The model combines the bi- ologically plausibility of Hodgkin–Huxley-type dynamics and the compu- tational efficiency of integrate-and-fire neurons. Using this model, one can simulate tens of thousands of spiking cortical neurons in real time (1 ms resolution) using a desktop PC.},
author = {Izhikevich, Eugene M.},
doi = {10.1007/3-540-45961-8_7},
file = {:C\:/Users/fffas/Documents/Workspace/LLM/gptsurvey/refpdf/Simple model of spiking neurons.pdf:pdf},
isbn = {9783540502517},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
mendeley-groups = {autosurvey},
number = {6},
pages = {77--84},
title = {{Simple Model of Spiking Neurons}},
volume = {330 LNCS},
year = {1988}
}
@article{TensorFI,
abstract = {As machine learning (ML) has seen increasing adoption in safety-critical domains (e.g., autonomous vehicles), the reliability of ML systems has also grown in importance. While prior studies have proposed techniques to enable efficient error-resilience (e.g., selective instruction duplication), a fundamental requirement for realizing these techniques is a detailed understanding of the application's resilience. In this work, we present TensorFI, a high-level fault injection (FI) framework for TensorFlow-based applications. TensorFI is able to inject both hardware and software faults in general TensorFlow programs. TensorFI is a configurable FI tool that is flexible, easy to use, and portable. It can be integrated into existing TensorFlow programs to assess their resilience for different fault types (e.g., faults in particular operators). We use TensorFI to evaluate the resilience of 12 ML programs, including DNNs used in the autonomous vehicle domain. The results give us insights into why some of the models are more resilient. We also present two case studies to demonstrate the usefulness of the tool. TensorFI is publicly available at https://github.com/DependableSystemsLab/TensorFI.},
archivePrefix = {arXiv},
arxivId = {2004.01743},
author = {Chen, Zitao and Narayanan, Niranjhana and Fang, Bo and Li, Guanpeng and Pattabiraman, Karthik and DeBardeleben, Nathan},
doi = {10.1109/ISSRE5003.2020.00047},
eprint = {2004.01743},
file = {:C\:/Users/fffas/Documents/Readspace/paper/issre20-tensorfi.pdf:pdf},
isbn = {9781728198705},
issn = {10719458},
journal = {Proceedings - International Symposium on Software Reliability Engineering, ISSRE},
keywords = {Fault Injection,Machine Learning,Resilience},
mendeley-groups = {understand_prop,autosurvey},
pages = {426--435},
title = {{Tensorfi: A flexible fault injection framework for tensorflow applications}},
volume = {2020-Octob},
year = {2020}
}
@article{SNR2021,
abstract = {As deep learning algorithms are widely adopted, an increasing number of them are positioned in embedded application domains with strict reliability constraints. The expenditure of significant resources to satisfy performance requirements in deep neural network accelerators has thinned out the margins for delivering safety in embedded deep learning applications, thus precluding the adoption of conventional fault tolerance methods. The potential of exploiting the inherent resilience characteristics of deep neural networks remains though unexplored, offering a promising low-cost path towards safety in embedded deep learning applications. This work demonstrates the possibility of such exploitation by juxtaposing the reduction of the vulnerability surface through the proper design of the quantization schemes with shaping the parameter distributions at each layer through the guidance offered by appropriate training methods, thus delivering deep neural networks of high resilience merely through algorithmic modifications. Unequaled error resilience characteristics can be thus injected into safety-critical deep learning applications to tolerate bit error rates of up to at absolutely zero hardware, energy, and performance costs while improving the error-free model accuracy even further.},
address = {New York, NY, USA},
author = {Ozen, Elbruz and Orailoglu, Alex},
doi = {10.1145/3477007},
file = {:C\:/Users/fffas/Documents/Readspace/paper/SNR2021TECS.pdf:pdf},
issn = {1539-9087},
journal = {ACM Trans. Embed. Comput. Syst.},
keywords = {Error resilience,neural network quantization,neural network regularization},
mendeley-groups = {understand_prop,autosurvey},
number = {5s},
publisher = {Association for Computing Machinery},
title = {{SNR: Squeezing Numerical Range Defuses Bit Error Vulnerability Surface in Deep Neural Networks}},
url = {https://doi.org/10.1145/3477007},
volume = {20},
year = {2021}
}
@article{Huang2023,
abstract = {Soft errors in large VLSI circuits have a significant impact on computing- and memory-intensive neural network (NN) processing. Understanding the influence of soft errors on NNs is critical to protect against soft errors for reliable NN processing. Prior work mainly relies on fault simulation to analyze the influence of soft errors on NN processing. They are accurate but usually specific to limited configurations of errors and NN models due to the prohibitively slow simulation speed especially for large NN models and datasets. With the observation that the influence of soft errors propagates across a large number of neurons and accumulates as well, we propose to characterize the soft error-induced data disturbance on each neuron with a normal distribution model using the central limit theorem and develop a series of statistical models to analyze the behavior of NN models under soft errors in general. The statistical models reveal not only the correlation between soft errors and the accuracy of NN models but also how NN parameters, such as quantization and architecture affect the reliability of NNs. The proposed models are compared with fault simulations and verified comprehensively. In addition, we observe that the statistical models that characterize the soft error influence can also be utilized to predict fault simulation results in many cases and we explore the use of the proposed statistical models to accelerate fault simulations of NNs. Our experiments show that the proposed accelerated fault simulation provides almost two orders of magnitude speedup with negligible loss of simulation accuracy compared to the baseline fault simulations.},
author = {Huang, Haitong and Xue, Xinghua and Liu, Cheng and Wang, Ying and Luo, Tao and Cheng, Long and Li, Huawei and Li, Xiaowei},
doi = {10.1109/TCAD.2023.3266405},
file = {:C\:/Users/fffas/Documents/Workspace/LLM/gptsurvey/refpdf/Statistical_Modeling_of_Soft_Error_Influence_on_Neural_Networks.pdf:pdf},
issn = {19374151},
journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
keywords = {Fault analysis,fault simulation,neural network (NN) reliability,statistical fault modeling},
mendeley-groups = {autosurvey},
number = {11},
pages = {4152--4163},
publisher = {IEEE},
title = {{Statistical Modeling of Soft Error Influence on Neural Networks}},
volume = {42},
year = {2023}
}
@inproceedings{Ares2018,
abstract = {As the use of deep neural networks continues to grow, so does the fraction of compute cycles devoted to their execution. This has led the CAD and architecture communities to devote considerable attention to building DNN hardware. Despite these efforts, the fault tolerance of DNNs has generally been overlooked. This paper is the first to conduct a large-scale, empirical study of DNN resilience. Motivated by the inherent algorithmic resilience of DNNs, we are interested in understanding the relationship between fault rate and model accuracy. To do so, we present Ares: A light-weight, DNN-specific fault injection framework validated within 12% of real hardware. We find that DNN fault tolerance varies by orders of magnitude with respect to model, layer type, and structure.},
author = {Reagen, Brandon and Gupta, Udit and Pentecost, Lillian and Whatmough, Paul and Lee, Sae Kyu and Mulholland, Niamh and Brooks, David and Wei, Gu Yeon},
booktitle = {Proceedings - Design Automation Conference},
doi = {10.1145/3195970.3195997},
file = {:C\:/Users/fffas/Documents/Readspace/paper/Ares.pdf:pdf},
issn = {0738100X},
mendeley-groups = {understand_prop,autosurvey},
title = {{Ares: A framework for quantifying the resilience of deep neural networks}},
volume = {Part F1377},
year = {2018}
}
@article{Mahmoud2020,
abstract = {As Convolutional Neural Networks (CNNs) are increasingly being employed in safety-critical applications, it is important that they behave reliably in the face of hardware errors. Transient hardware errors may percolate undesirable state during execution, resulting in software-manifested errors which can adversely affect high-level decision making. This paper presents HarDNN, a software-directed approach to identify vulnerable computations during a CNN inference and selectively protect them based on their propensity towards corrupting the inference output in the presence of a hardware error. We show that HarDNN can accurately estimate relative vulnerability of a feature map (fmap) in CNNs using a statistical error injection campaign, and explore heuristics for fast vulnerability assessment. Based on these results, we analyze the tradeoff between error coverage and computational overhead that the system designers can use to employ selective protection. Results show that the improvement in resilience for the added computation is superlinear with HarDNN. For example, HarDNN improves SqueezeNet's resilience by 10x with just 30% additional computations.},
archivePrefix = {arXiv},
arxivId = {2002.09786},
author = {Mahmoud, Abdulrahman and Hari, Siva Kumar Sastry and Fletcher, Christopher W. and Adve, Sarita V. and Sakr, Charbel and Shanbhag, Naresh and Molchanov, Pavlo and Sullivan, Michael B. and Tsai, Timothy and Keckler, Stephen W.},
eprint = {2002.09786},
file = {:C\:/Users/fffas/Documents/Readspace/paper/Hardnn_Featuremapvulnerabilityevaluationincnns.pdf:pdf},
mendeley-groups = {ReliableNN,autosurvey},
title = {{HarDNN: Feature Map Vulnerability Evaluation in CNNs}},
url = {http://arxiv.org/abs/2002.09786},
year = {2020}
}
@article{Baier2017,
abstract = {Monte Carlo Tree Search (MCTS) has been found to be a weaker player than minimax in some tactical domains, partly due to its highly selective focus only on the most promising moves. In order to combine the strategic strength of MCTS and the tactical strength of minimax, MCTS-minimax hybrids have been introduced in prior work, embedding shallow minimax searches into the MCTS framework. This paper continues this line of research by integrating MCTS and minimax even more tightly into one rollout-based hybrid search algorithm, MCTS-$\alpha$$\beta$. The hybrid is able to execute two types of rollouts: MCTS rollouts and alpha-beta rollouts, i.e. rollouts implementing minimax with alpha-beta pruning and iterative deepening. During the search, all nodes accu-mulate both MCTS value estimates as well as alpha-beta value bounds. The two types of information are combined in a given tree node when-ever alpha-beta completes a deepening iteration rooted in that node—by increasing the MCTS value estimates for the best move found by alpha-beta. A single parameter, the probability of executing MCTS rollouts vs. alpha-beta rollouts, makes it possible for the hybrid to subsume both MCTS as well as alpha-beta search as extreme cases, while allowing for a spectrum of new search algorithms in between. Preliminary results in the game of Breakthrough show the proposed hybrid to outperform its special cases of alpha-beta and MCTS. These results are promising for the further development of rollout-based algo-rithms that unify MCTS and minimax approaches.},
author = {Baier, Hendrik},
doi = {10.1007/978-3-319-57969-6_5},
file = {:C\:/Users/fffas/Documents/Readspace/paper/Baier2017_Chapter_ARollout-BasedSearchAlgorithmU.pdf:pdf},
isbn = {9783319579689},
issn = {18650929},
journal = {Communications in Computer and Information Science},
mendeley-groups = {compgame,autosurvey},
pages = {57--70},
title = {{A rollout-based search algorithm unifying MCTS and alpha-beta}},
volume = {705},
year = {2017}
}