Skip to content

Commit 058a271

Browse files
author
Pietro Passarelli - News Labs
committed
fix from PR review
Fixed from James comments from #144 (review)
1 parent 3588b26 commit 058a271

2 files changed

Lines changed: 29 additions & 40 deletions

File tree

packages/components/timed-text-editor/UpdateTimestamps/index.js

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,36 @@
1-
import generateEntitiesRanges from '../../../stt-adapters/generate-entities-ranges/index.js';
2-
import { createEntityMap } from '../../../stt-adapters/index.js';
1+
import generateEntitiesRanges from '../../../stt-adapters/generate-entities-ranges';
2+
import { createEntityMap } from '../../../stt-adapters';
33
import alignWords from './stt-align-node.js';
44

55
const convertContentToText = (content) => {
66
let text = [];
7-
for (var blockIdx in content.blocks) {
8-
const block = content.blocks[blockIdx];
7+
for (const blockIndex in content.blocks) {
8+
const block = content.blocks[blockIndex];
99
const blockArray = block.text.match(/\S+/g) || [];
1010
text = text.concat(blockArray);
1111
}
1212

13-
return (text);
13+
return text;
1414
};
1515

16-
const createEntity = (start, end, confidence, word, wordIdx) => {
16+
const createEntity = (start, end, confidence, word, wordIndex) => {
1717
return ({
1818
start: start,
1919
end: end,
2020
confidence: confidence,
2121
word: word.toLowerCase().replace(/[.?!]/g, ''),
2222
punct: word,
23-
index: wordIdx,
23+
index: wordIndex,
2424
});
2525
};
2626

2727
const createContentFromEntityList = (currentContent, newEntities) => {
2828
// Update entites to block structure.
29-
var updatedBlockArray = [];
30-
var totalWords = 0;
29+
const updatedBlockArray = [];
30+
let totalWords = 0;
3131

32-
for (var blockIdx in currentContent.blocks) {
33-
const block = currentContent.blocks[blockIdx];
32+
for (const blockIndex in currentContent.blocks) {
33+
const block = currentContent.blocks[blockIndex];
3434
// if copy and pasting large chunk of text
3535
// currentContentBlock, would not have speaker and start/end time info
3636
// so for updatedBlock, getting start time from first word in blockEntities
@@ -68,7 +68,7 @@ const updateTimestamps = (currentContent, originalContent) => {
6868

6969
const entities = [];
7070

71-
for (var entityIdx in entityMap) {
71+
for (const entityIdx in entityMap) {
7272
entities.push({
7373
start: parseFloat(entityMap[entityIdx].data.start),
7474
end: parseFloat(entityMap[entityIdx].data.end),

packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// code obtained from https://github.com/bbc/stt-align-node
22

3-
import converterNumbersToWords from 'number-to-words';
3+
import { toWords } from 'number-to-words';
44
import difflib from 'difflib';
55
import everpolate from 'everpolate';
66

@@ -24,12 +24,12 @@ function removeTrailingPunctuation(str) {
2424
* handles edge case if word is undefined, and returns undefined in that instance
2525
*/
2626
function normaliseWord(wordText) {
27-
if (wordText !== undefined) {
27+
if (wordText) {
2828
const wordTextResult = wordText.toLowerCase().trim().replace(/[^a-z|0-9|.]+/g, '');
2929
if (isANumber(wordTextResult)) {
3030
const sanitizedWord = removeTrailingPunctuation(wordTextResult);
3131
if (sanitizedWord !== '') {
32-
return converterNumbersToWords.toWords(sanitizedWord);
32+
return toWords(sanitizedWord);
3333
}
3434
}
3535

@@ -100,30 +100,28 @@ function adjustTimecodesBoundaries(words) {
100100
}
101101

102102
function interpolate(wordsList) {
103-
let words = interpolationOptimization(wordsList);
103+
const words = interpolationOptimization(wordsList);
104104
const indicies = [ ...Array(words.length).keys() ];
105105
const indiciesWithStart = [];
106106
const indiciesWithEnd = [];
107107
const startTimes = [];
108108
const endTimes = [];
109-
// interpolate times for start
110-
for (let i = 0; i < words.length; i++) {
111-
if ('start' in words[i]) {
112-
indiciesWithStart.push(i);
113-
startTimes.push(words[i].start);
109+
110+
words.forEach((word, index) => {
111+
if ('start' in word) {
112+
indiciesWithStart.push(index);
113+
startTimes.push(word.start);
114114
}
115-
}
116-
// interpolate times for end
117-
for (let i = 0; i < words.length; i++) {
118-
if ('end' in words[i]) {
119-
indiciesWithEnd.push(i);
120-
endTimes.push(words[i].end);
115+
116+
if ('end' in word) {
117+
indiciesWithEnd.push(index);
118+
endTimes.push(word.end);
121119
}
122-
}
120+
});
123121
// http://borischumichev.github.io/everpolate/#linear
124122
const outStartTimes = everpolate.linear(indicies, indiciesWithStart, startTimes);
125123
const outEndTimes = everpolate.linear(indicies, indiciesWithEnd, endTimes);
126-
words = words.map((word, index) => {
124+
const wordsResults = words.map((word, index) => {
127125
if (!('start' in word)) {
128126
word.start = outStartTimes[index];
129127
}
@@ -134,40 +132,31 @@ function interpolate(wordsList) {
134132
return word;
135133
});
136134

137-
return adjustTimecodesBoundaries(words);
135+
return adjustTimecodesBoundaries(wordsResults);
138136
}
139137

140138
/**
141139
*
142-
* @param {array} sttData - array of STT words
140+
* @param {array} sttWords - array of STT words
143141
* @param {array} transcriptWords - array of base text accurate words
144142
*/
145143
function alignWords(sttWords, transcriptWords) {
146-
// console.log(sttWords);
147-
// # extract list of words
148-
// sttWords=[words.get('word') for words in sttData]
149-
150144
// # convert words to lowercase and remove numbers and special characters
151-
// sttWordsStripped = [re.sub('[^a-z]', '', word.lower()) for word in sttWords]
152145
const sttWordsStripped = sttWords.map((word) => {
153146
return normaliseWord(word.word);
154147
});
155148

156-
// transcriptWordsStripped = [re.sub('[^a-z]', '', word.lower()) for word in transcriptWords]
157149
const transcriptWordsStripped = transcriptWords.map((word) => {
158150
return normaliseWord(word);
159151
});
160152
// # create empty list to receive data
161-
// transcriptData = [{} for _ in range(len(transcriptWords))]
162153
const transcriptData = [];
163154
// empty objects as place holder
164155
transcriptWords.forEach(() => {
165156
transcriptData.push({});
166157
});
167158
// # populate transcriptData with matching words
168-
// matcher = difflib.SequenceMatcher(None, sttWordsStripped, transcriptWordsStripped)
169159
// // if they are same length, just interpolate words ?
170-
// const matcher = diffWordMode(transcriptWordsStripped, sttWordsStripped);
171160
// http://qiao.github.io/difflib.js/
172161
const matcher = new difflib.SequenceMatcher(null, sttWordsStripped, transcriptWordsStripped);
173162
const opCodes = matcher.getOpcodes();

0 commit comments

Comments
 (0)