-
Notifications
You must be signed in to change notification settings - Fork 3
/
DetectorActivity.java
352 lines (299 loc) · 12.5 KB
/
DetectorActivity.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
/*
* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.tensorflow.demo;
import android.graphics.Bitmap;
import android.graphics.Bitmap.Config;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Matrix;
import android.graphics.Paint;
import android.graphics.Paint.Style;
import android.graphics.RectF;
import android.graphics.Typeface;
import android.media.ImageReader.OnImageAvailableListener;
import android.os.SystemClock;
import android.util.Size;
import android.util.TypedValue;
import android.view.Display;
import android.widget.Toast;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Vector;
import org.tensorflow.demo.OverlayView.DrawCallback;
import org.tensorflow.demo.env.BorderedText;
import org.tensorflow.demo.env.ImageUtils;
import org.tensorflow.demo.env.Logger;
import org.tensorflow.demo.tracking.MultiBoxTracker;
import org.tensorflow.demo.R; // Explicit import needed for internal Google builds.
/**
* An activity that uses a TensorFlowMultiBoxDetector and ObjectTracker to detect and then track
* objects.
*/
public class DetectorActivity extends CameraActivity implements OnImageAvailableListener {
private static final Logger LOGGER = new Logger();
// Configuration values for the prepackaged multibox model.
private static final int MB_INPUT_SIZE = 224;
private static final int MB_IMAGE_MEAN = 128;
private static final float MB_IMAGE_STD = 128;
private static final String MB_INPUT_NAME = "ResizeBilinear";
private static final String MB_OUTPUT_LOCATIONS_NAME = "output_locations/Reshape";
private static final String MB_OUTPUT_SCORES_NAME = "output_scores/Reshape";
private static final String MB_MODEL_FILE = "file:///android_asset/multibox_model.pb";
private static final String MB_LOCATION_FILE =
"file:///android_asset/multibox_location_priors.txt";
private static final int TF_OD_API_INPUT_SIZE = 300;
private static final String TF_OD_API_MODEL_FILE =
"file:///android_asset/INCMODEL.pb";
private static final String TF_OD_API_LABELS_FILE = "file:///android_asset/object-detection.pbtxt";
// Configuration values for tiny-yolo-voc. Note that the graph is not included with TensorFlow and
// must be manually placed in the assets/ directory by the user.
// Graphs and models downloaded from http://pjreddie.com/darknet/yolo/ may be converted e.g. via
// DarkFlow (https://github.com/thtrieu/darkflow). Sample command:
// ./flow --model cfg/tiny-yolo-voc.cfg --load bin/tiny-yolo-voc.weights --savepb --verbalise
private static final String YOLO_MODEL_FILE = "file:///android_asset/graph-tiny-yolo-voc.pb";
private static final int YOLO_INPUT_SIZE = 416;
private static final String YOLO_INPUT_NAME = "input";
private static final String YOLO_OUTPUT_NAMES = "output";
private static final int YOLO_BLOCK_SIZE = 32;
// Which detection model to use: by default uses Tensorflow Object Detection API frozen
// checkpoints. Optionally use legacy Multibox (trained using an older version of the API)
// or YOLO.
private enum DetectorMode {
TF_OD_API, MULTIBOX, YOLO;
}
private static final DetectorMode MODE = DetectorMode.TF_OD_API;
// Minimum detection confidence to track a detection.
private static final float MINIMUM_CONFIDENCE_TF_OD_API = 0.6f;
private static final float MINIMUM_CONFIDENCE_MULTIBOX = 0.1f;
private static final float MINIMUM_CONFIDENCE_YOLO = 0.25f;
private static final boolean MAINTAIN_ASPECT = MODE == DetectorMode.YOLO;
private static final Size DESIRED_PREVIEW_SIZE = new Size(640, 480);
private static final boolean SAVE_PREVIEW_BITMAP = false;
private static final float TEXT_SIZE_DIP = 10;
private Integer sensorOrientation;
private Classifier detector;
private long lastProcessingTimeMs;
private Bitmap rgbFrameBitmap = null;
private Bitmap croppedBitmap = null;
private Bitmap cropCopyBitmap = null;
private boolean computingDetection = false;
private long timestamp = 0;
private Matrix frameToCropTransform;
private Matrix cropToFrameTransform;
private MultiBoxTracker tracker;
private byte[] luminanceCopy;
private BorderedText borderedText;
@Override
public void onPreviewSizeChosen(final Size size, final int rotation) {
final float textSizePx =
TypedValue.applyDimension(
TypedValue.COMPLEX_UNIT_DIP, TEXT_SIZE_DIP, getResources().getDisplayMetrics());
borderedText = new BorderedText(textSizePx);
borderedText.setTypeface(Typeface.MONOSPACE);
tracker = new MultiBoxTracker(this);
int cropSize = TF_OD_API_INPUT_SIZE;
if (MODE == DetectorMode.YOLO) {
detector =
TensorFlowYoloDetector.create(
getAssets(),
YOLO_MODEL_FILE,
YOLO_INPUT_SIZE,
YOLO_INPUT_NAME,
YOLO_OUTPUT_NAMES,
YOLO_BLOCK_SIZE);
cropSize = YOLO_INPUT_SIZE;
} else if (MODE == DetectorMode.MULTIBOX) {
detector =
TensorFlowMultiBoxDetector.create(
getAssets(),
MB_MODEL_FILE,
MB_LOCATION_FILE,
MB_IMAGE_MEAN,
MB_IMAGE_STD,
MB_INPUT_NAME,
MB_OUTPUT_LOCATIONS_NAME,
MB_OUTPUT_SCORES_NAME);
cropSize = MB_INPUT_SIZE;
} else {
try {
detector = TensorFlowObjectDetectionAPIModel.create(
getAssets(), TF_OD_API_MODEL_FILE, TF_OD_API_LABELS_FILE, TF_OD_API_INPUT_SIZE);
cropSize = TF_OD_API_INPUT_SIZE;
} catch (final IOException e) {
LOGGER.e("Exception initializing classifier!", e);
Toast toast =
Toast.makeText(
getApplicationContext(), "Classifier could not be initialized", Toast.LENGTH_SHORT);
toast.show();
finish();
}
}
previewWidth = size.getWidth();
previewHeight = size.getHeight();
final Display display = getWindowManager().getDefaultDisplay();
final int screenOrientation = display.getRotation();
LOGGER.i("Sensor orientation: %d, Screen orientation: %d", rotation, screenOrientation);
sensorOrientation = rotation + screenOrientation;
LOGGER.i("Initializing at size %dx%d", previewWidth, previewHeight);
rgbFrameBitmap = Bitmap.createBitmap(previewWidth, previewHeight, Config.ARGB_8888);
croppedBitmap = Bitmap.createBitmap(cropSize, cropSize, Config.ARGB_8888);
frameToCropTransform =
ImageUtils.getTransformationMatrix(
previewWidth, previewHeight,
cropSize, cropSize,
sensorOrientation, MAINTAIN_ASPECT);
cropToFrameTransform = new Matrix();
frameToCropTransform.invert(cropToFrameTransform);
trackingOverlay = (OverlayView) findViewById(R.id.tracking_overlay);
trackingOverlay.addCallback(
new DrawCallback() {
@Override
public void drawCallback(final Canvas canvas) {
tracker.draw(canvas);
if (isDebug()) {
tracker.drawDebug(canvas);
}
}
});
addCallback(
new DrawCallback() {
@Override
public void drawCallback(final Canvas canvas) {
if (!isDebug()) {
return;
}
final Bitmap copy = cropCopyBitmap;
if (copy == null) {
return;
}
final int backgroundColor = Color.argb(100, 0, 0, 0);
canvas.drawColor(backgroundColor);
final Matrix matrix = new Matrix();
final float scaleFactor = 2;
matrix.postScale(scaleFactor, scaleFactor);
matrix.postTranslate(
canvas.getWidth() - copy.getWidth() * scaleFactor,
canvas.getHeight() - copy.getHeight() * scaleFactor);
canvas.drawBitmap(copy, matrix, new Paint());
final Vector<String> lines = new Vector<String>();
if (detector != null) {
final String statString = detector.getStatString();
final String[] statLines = statString.split("\n");
for (final String line : statLines) {
lines.add(line);
}
}
lines.add("");
lines.add("Frame: " + previewWidth + "x" + previewHeight);
lines.add("Crop: " + copy.getWidth() + "x" + copy.getHeight());
lines.add("View: " + canvas.getWidth() + "x" + canvas.getHeight());
lines.add("Rotation: " + sensorOrientation);
lines.add("Inference time: " + lastProcessingTimeMs + "ms");
borderedText.drawLines(canvas, 10, canvas.getHeight() - 10, lines);
}
});
}
OverlayView trackingOverlay;
@Override
protected void processImage() {
++timestamp;
final long currTimestamp = timestamp;
byte[] originalLuminance = getLuminance();
tracker.onFrame(
previewWidth,
previewHeight,
getLuminanceStride(),
sensorOrientation,
originalLuminance,
timestamp);
trackingOverlay.postInvalidate();
// No mutex needed as this method is not reentrant.
if (computingDetection) {
readyForNextImage();
return;
}
computingDetection = true;
LOGGER.i("Preparing image " + currTimestamp + " for detection in bg thread.");
rgbFrameBitmap.setPixels(getRgbBytes(), 0, previewWidth, 0, 0, previewWidth, previewHeight);
if (luminanceCopy == null) {
luminanceCopy = new byte[originalLuminance.length];
}
System.arraycopy(originalLuminance, 0, luminanceCopy, 0, originalLuminance.length);
readyForNextImage();
final Canvas canvas = new Canvas(croppedBitmap);
canvas.drawBitmap(rgbFrameBitmap, frameToCropTransform, null);
// For examining the actual TF input.
if (SAVE_PREVIEW_BITMAP) {
ImageUtils.saveBitmap(croppedBitmap);
}
runInBackground(
new Runnable() {
@Override
public void run() {
LOGGER.i("Running detection on image " + currTimestamp);
final long startTime = SystemClock.uptimeMillis();
final List<Classifier.Recognition> results = detector.recognizeImage(croppedBitmap);
lastProcessingTimeMs = SystemClock.uptimeMillis() - startTime;
cropCopyBitmap = Bitmap.createBitmap(croppedBitmap);
final Canvas canvas = new Canvas(cropCopyBitmap);
final Paint paint = new Paint();
paint.setColor(Color.RED);
paint.setStyle(Style.STROKE);
paint.setStrokeWidth(2.0f);
float minimumConfidence = MINIMUM_CONFIDENCE_TF_OD_API;
switch (MODE) {
case TF_OD_API:
minimumConfidence = MINIMUM_CONFIDENCE_TF_OD_API;
break;
case MULTIBOX:
minimumConfidence = MINIMUM_CONFIDENCE_MULTIBOX;
break;
case YOLO:
minimumConfidence = MINIMUM_CONFIDENCE_YOLO;
break;
}
final List<Classifier.Recognition> mappedRecognitions =
new LinkedList<Classifier.Recognition>();
for (final Classifier.Recognition result : results) {
final RectF location = result.getLocation();
if (location != null && result.getConfidence() >= minimumConfidence) {
canvas.drawRect(location, paint);
cropToFrameTransform.mapRect(location);
result.setLocation(location);
mappedRecognitions.add(result);
}
}
tracker.trackResults(mappedRecognitions, luminanceCopy, currTimestamp);
trackingOverlay.postInvalidate();
requestRender();
computingDetection = false;
}
});
}
@Override
protected int getLayoutId() {
return R.layout.camera_connection_fragment_tracking;
}
@Override
protected Size getDesiredPreviewFrameSize() {
return DESIRED_PREVIEW_SIZE;
}
@Override
public void onSetDebug(final boolean debug) {
detector.enableStatLogging(debug);
}
}