-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.cpp
207 lines (179 loc) · 8.79 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#include <fstream>
#include <sstream>
#include <iostream>
#include "whisper.h"
#include "WhisperComponent.hpp"
#include "resamplingCode/CDSPResampler.h"
#include "gFunctions.hpp"
using namespace std;
int main(){
atomic<bool> canRead(false);
auto start = std::chrono::high_resolution_clock::now();
std::cout << "STARTING MAIN THREAD\n" << '\n';
std::thread whisperThread([start, &canRead]{
// whisper init
std::cout << "STARTING WHISPER THREAD\n" << '\n';
WhisperComponent wComponent;
WhisperComponent::whisper_params params;
params.model = "/Users/tonytorm/Documents/gCoding/whisp/models/ggml-model-whisper-tiny.en.bin";
whisper_context* ctx = whisper_init_from_file(params.model.c_str());
while (true){
// also check file is not being written to in this moment
ifstream infile("/Users/tonytorm/Documents/gCoding/whisp/TESTFILES/MIXER-A-T01_split1.bin", std::ios::binary | std::ios::ate);
if (infile && canRead.load()){
int size = (int)infile.tellg();
infile.seekg(0, std::ios::beg);
int sizeInFloats = size/sizeof(float);
float* data = new float[sizeInFloats];
infile.read((char*)data, size);
infile.close();
wComponent.callWhisperFullWithoutAudiofile(ctx,
params,
data,
sizeInFloats,
params.n_processors != 0);
delete [] data;
std::cout << "DONE. TIME ELAPSED - " << '\n';
printElapsedTimeSince(start);
break;
}else{
std::this_thread::sleep_for(std::chrono::seconds(5)); // check every 5 seconds
std::cout << "FILE IS NOT READY, WAITING 5 SECONDS\n" << '\n';
}
}
});
const double OutSampleRate = 16000.0;
AudioFile wavFile;
std::string partialFilePath = "/Users/tonytorm/Desktop/kraken_media/bal/";
std::string fileName = "MIXER-A-T01";
std::string filePath = partialFilePath + fileName + ".WAV";
if (readHeader(filePath, &wavFile)){
std::cout << "Wave file parsed - ";
printElapsedTimeSince(start);
ifstream inputStream;
std::vector<ofstream> outputStreams;
inputStream.open(filePath, ifstream::binary); //49152
if(!inputStream.good()){
cout << "error while trying to open buffer\n";
return -1; // error
}
inputStream.seekg((uint32_t)wavFile._datalocation);
// uint32_t dataChunkSize = (uint32_t)(wavFile._datasize/wavFile._ChannelCount) /3 * 2; // force to 16 bit depth
// uint32_t fmtChunkSize = 16;
//uint32_t totalFileSize = (8 + fmtChunkSize) + (8 + dataChunkSize);
for (int i = 0; i < wavFile._ChannelCount; i++){
std::string folderPath = "/Users/tonytorm/Documents/gCoding/whisp/TESTFILES/";
std::string str = folderPath + fileName + "_split" + std::to_string(i) + ".bin";
const char* outputFileName = str.c_str();
outputStreams.emplace_back(ofstream(outputFileName, ofstream::binary));
//ofstream& stream = outputStreams.back();
// writeRIFFChunk(stream, totalFileSize);
// writeFMTChunk(stream, OutSampleRate, 16, 1); // write a 16bit mono file for each channel
// writeDataChunkHeader(stream, dataChunkSize);
}
const int InBufCapacity = 1024;
int channelCount = wavFile._ChannelCount;
std::vector<std::unique_ptr<r8b::CDSPResampler24>> resamplers(channelCount);
int bytesPerSample = wavFile._bitdepth/8;
int frameSize = bytesPerSample*wavFile._ChannelCount; // 1 sample for each channel
const size_t INPUTBUFFERSIZE = frameSize * InBufCapacity;
std::vector<std::vector<double>> convertedPlanarBuffers; // from raw data to double
for (int i = 0; i < channelCount; i++){ // allocate a buffer for each channel (planar)
convertedPlanarBuffers.push_back(std::vector<double>(1024));
resamplers[i].reset(new r8b::CDSPResampler24(48000.0, OutSampleRate, InBufCapacity));
}
char buffer[INPUTBUFFERSIZE]; // allocate an interleaved buffer
int dataRead = 0;
std::vector<std::vector<float>> floatBuffer(channelCount);
//std::cout << "nb of required buffers: " << wavFile._datasize / INPUTBUFFERSIZE << '\n';
while (inputStream.read(buffer, INPUTBUFFERSIZE)) { // buffer speed
static int bufferCounter = 0;
bufferCounter++;
static int count = 0;
if (count ==0){ // just signal start of processing
std::cout << "Starting bit depth/sampling conversion - ";
printElapsedTimeSince(start);
count++;
}
int b = 0;
// should we zero the buffers as well?
for (int i = 0; i < INPUTBUFFERSIZE; i+=frameSize){ // frame speed
for (int j = 0; j < channelCount; j++){
char rawData[bytesPerSample];
for (int z = 0; z < bytesPerSample; z++){
rawData[z] = buffer[i + (j*bytesPerSample+z)];
}
double sample = convertByteArrayToDouble(rawData);
convertedPlanarBuffers.at(j).at(b) = sample;
}
b++; // this counter is here to index samples
}
//buffer speed
double* resampledBuffers[channelCount];
for (int j = 0; j < channelCount; j++){
auto& resampler = resamplers[j];
int writeCount = 0;
writeCount = resampler->process(convertedPlanarBuffers[j].data(), InBufCapacity, resampledBuffers[j]);
int prevSize = (int)floatBuffer[j].size();
floatBuffer[j].resize(prevSize + writeCount);
for (int i = 0; i < writeCount; i++){
double* doubleBuffer = resampledBuffers[j];
floatBuffer[j][i+prevSize] = doubleBuffer[i];
}
// if (floatBuffer.size() != 0) std::cout << "FLOAT BUFFER SIZE :" << floatBuffer.size() << '\n';
// for (int i = prevSize; i < floatBuffer.size(); i++){
// floatBuffer[i] = -1;
// }
// char outSample[2] = {0, 0};
// ConvertDoubleToByteArray(outSample, (float)resampledBuffers[j][i]);
// streamDataSampleInBytes(outputStreams[j], outSample);
}
// if (bufferCounter == 200){
// wComponent.callWhisperFullWithoutAudiofile(ctx, params, floatBuffer.data(), (int)floatBuffer.size(), params.n_processors != 0);
// bufferCounter = 0;
// floatBuffer.clear();
// }
dataRead += inputStream.gcount();
if (dataRead >= wavFile._datasize){
for (int i = 0; i < channelCount; i++){
outputStreams[i].write(reinterpret_cast<char*>(floatBuffer[i].data()), (int)floatBuffer[i].size() * sizeof(float));
outputStreams[i].close();
canRead.store(true);
}
break;
}
}
std::cout<< "Almost done, closing file streams - ";
printElapsedTimeSince(start);
std::cout<<'\n';
inputStream.close();
// for (int i = 0; i < channelCount; i++){
// // we should write data chunk size in here
// outputStreams[i].close();
// }
}
std::cout << "ALL DONE ON MAIN THREAD - ";
printElapsedTimeSince(start);
whisperThread.join();
return 0;
}
// whisper_params _params;
// // input a single 44.1k 16bit audiofile
// _params.fname_inp.push_back("/Users/tonytorm/Documents/gCoding/whisp/TESTFILES/TESTWAV1.wav");
//
// // whisper init
// params.model = "/Users/tonytorm/Documents/gCoding/whisp/models/ggml-model-whisper-tiny.en.bin";
// whisper_context* _ctx = whisper_init_from_file(_params.model.c_str());
//
// if (_ctx == nullptr) {
// fprintf(stderr, "error: failed to initialize whisper context\n");
// return 0;
// }
// std::cout << '\n' << "Model loaded - ";
// printElapsedTimeSince(start);
// std::cout << '\n';
//
// // 1.42 secs on 4.41 minutes file
//
//
// runTranscription(_ctx, _params);