From 482226b15a757f39871038f35b3b8aad7729e594 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Thu, 21 Feb 2019 11:05:10 -0500 Subject: [PATCH] [sentiment] Revise *_CHAR to *_INDEX for clarity (#233) --- README.md | 18 +++++++++--------- sentiment/data.js | 6 +++--- sentiment/index.js | 4 ++-- sentiment/sequence_utils.js | 6 +++--- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 4f819fa61..d3f44b880 100644 --- a/README.md +++ b/README.md @@ -98,14 +98,14 @@ to another project. date-conversion-attention - - - Attention RNN for text-to-text conversion - - - - - + 🔗 + Text + Text-to-text conversion + Attention mechanism, RNN + Node + Browser and Node + Layers + Saving to filesystem and loading in browser iris @@ -144,7 +144,7 @@ to another project. lstm-text-generation 🔗 Text - Sequence-to-prediction + Sequence prediction RNN: LSTM Browser Browser diff --git a/sentiment/data.js b/sentiment/data.js index c9a8352b3..6db1caaae 100644 --- a/sentiment/data.js +++ b/sentiment/data.js @@ -21,7 +21,7 @@ import * as https from 'https'; import * as os from 'os'; import * as path from 'path'; -import {OOV_CHAR, PAD_CHAR, padSequences} from './sequence_utils'; +import {OOV_INDEX, PAD_INDEX, padSequences} from './sequence_utils'; // `import` doesn't seem to work with extract-zip. const extract = require('extract-zip'); @@ -36,7 +36,7 @@ const METADATA_TEMPLATE_URL = * * @param {string} filePath Data file on local filesystem. * @param {string} numWords Number of words in the vocabulary. Word indices - * that exceed this limit will be marked as `OOV_CHAR`. + * that exceed this limit will be marked as `OOV_INDEX`. * @param {string} maxLen Length of each sequence. Longer sequences will be * pre-truncated; shorter ones will be pre-padded. * @return {tf.Tensor} The dataset represented as a 2D `tf.Tensor` of shape @@ -60,7 +60,7 @@ function loadFeatures(filePath, numWords, maxLen) { seq = []; } else { // Sequence continues. - seq.push(value >= numWords ? OOV_CHAR : value); + seq.push(value >= numWords ? OOV_INDEX : value); } index += 4; } diff --git a/sentiment/index.js b/sentiment/index.js index 430fdd70f..05a6559e1 100644 --- a/sentiment/index.js +++ b/sentiment/index.js @@ -18,7 +18,7 @@ import * as tf from '@tensorflow/tfjs'; import * as loader from './loader'; import * as ui from './ui'; -import {OOV_CHAR, padSequences} from './sequence_utils'; +import {OOV_INDEX, padSequences} from './sequence_utils'; const HOSTED_URLS = { model: @@ -65,7 +65,7 @@ class SentimentPredictor { const sequence = inputText.map(word => { let wordIndex = this.wordIndex[word] + this.indexFrom; if (wordIndex > this.vocabularySize) { - wordIndex = OOV_CHAR; + wordIndex = OOV_INDEX; } return wordIndex; }); diff --git a/sentiment/sequence_utils.js b/sentiment/sequence_utils.js index 54d6169a0..8ac41d446 100644 --- a/sentiment/sequence_utils.js +++ b/sentiment/sequence_utils.js @@ -19,8 +19,8 @@ * Utilities for sequential data. */ -export const PAD_CHAR = 0; -export const OOV_CHAR = 2; +export const PAD_INDEX = 0; // Index of the padding character. +export const OOV_INDEX = 2; // Index fo the OOV character. /** * Pad and truncate all sequences to the same length @@ -34,7 +34,7 @@ export const OOV_CHAR = 2; * @param {number} value Padding value. */ export function padSequences( - sequences, maxLen, padding = 'pre', truncating = 'pre', value = PAD_CHAR) { + sequences, maxLen, padding = 'pre', truncating = 'pre', value = PAD_INDEX) { // TODO(cais): This perhaps should be refined and moved into tfjs-preproc. return sequences.map(seq => { // Perform truncation.