1
1
#include < QJsonDocument>
2
2
#include < QThread>
3
-
3
+ # include < QMessageBox >
4
4
#include " OpenAI/openai.hpp"
5
5
#include " communicate.h"
6
6
7
7
using namespace Microsoft ::CognitiveServices::Speech;
8
8
using namespace Microsoft ::CognitiveServices::Speech::Audio;
9
9
10
10
Mailman::Mailman (ChatWidget *chatWidget) : m_chatWidget(chatWidget) {
11
+ // Save the chat history to Qt Standard model.
11
12
connect (this , SIGNAL (updateChatModel (nlohmann::json,QString)), m_chatWidget, SLOT (update (nlohmann::json,QString)));
12
13
}
13
14
14
15
15
16
void Mailman::talk (QString prompts, QString azureSetting) {
17
+
18
+ // Get the current prompts.
16
19
static QRegularExpression fingNewLineChar (" [\\ n]" );
17
20
std::string cleanprompt = prompts.remove (fingNewLineChar).toStdString ();
18
21
22
+ // Format the prompts following OPENAI's standard.
19
23
jsonBody = initJsonBody (cleanprompt);
20
24
25
+ // Read the variables from System Setting.
21
26
auto speechKey = GetEnvironmentVariable (" SPEECH_KEY" );
22
27
auto speechRegion = GetEnvironmentVariable (" SPEECH_REGION" );
23
28
29
+ // If the variable is not set, then quit the thread.
24
30
if ((std::size (speechKey) == 0 ) || (std::size (speechRegion) == 0 )) {
25
31
std::cout << " Please set both SPEECH_KEY and SPEECH_REGION environment variables." << std::endl;
32
+ QMessageBox msgBox;
33
+ msgBox.setWindowTitle (" Error" );
34
+ msgBox.setText (" Cannot find SPEECH_KEY and SPEECH_REGION environment variables. Check your system environment variable setting!" );
35
+ msgBox.setIcon (QMessageBox::Information);
36
+ msgBox.addButton (" OK" , QMessageBox::AcceptRole);
37
+
38
+ QThread::currentThread ()->quit ();
39
+ QThread::currentThread ()->wait ();
26
40
}
27
-
28
41
auto speechConfig = SpeechConfig::FromSubscription (speechKey, speechRegion);
29
42
43
+ // Get the preferred voice.
30
44
QString language = azureSetting.split (" -" )[0 ] + " -" + azureSetting.split (" -" )[1 ];
31
45
QString voice = azureSetting;
32
46
speechConfig->SetSpeechRecognitionLanguage (language.toStdString ());
33
47
speechConfig->SetSpeechSynthesisVoiceName (voice.toStdString ());
34
48
35
-
49
+ // Initialize streaming...
36
50
auto audioConfig = AudioConfig::FromDefaultMicrophoneInput ();
37
51
auto recognizer = SpeechRecognizer::FromConfig (speechConfig, audioConfig);
38
52
auto speechSynthesizer = SpeechSynthesizer::FromConfig (speechConfig);
39
53
40
-
54
+ // Start streaming...
41
55
while (!QThread::currentThread ()->isInterruptionRequested ())
42
56
{
43
57
auto voiceInput = recognizer->RecognizeOnceAsync ().get ();
@@ -49,21 +63,29 @@ void Mailman::talk(QString prompts, QString azureSetting) {
49
63
50
64
nlohmann::json userInputJson;
51
65
52
- // Add key-value pairs to the JSON object
66
+ // The json contains Expression and Motion to animate the Live2D model and Content for conversation.
53
67
userInputJson[" Expression" ] = " " ;
54
68
userInputJson[" Motion" ] = " " ;
55
69
userInputJson[" Content" ] = userInput;
70
+
71
+ // Save the json to Qt StandardItemModel. So that the chat bubble can be filled in with words.
56
72
emit updateChatModel (userInputJson, " Outgoing" );
57
73
74
+ // Format the user input following OpenAI's standard.
58
75
nlohmann::json userMessage = getNewMessage (" user" , userInput);
76
+
77
+ // Stitch it with the main json.
59
78
jsonBody = insertMessage (jsonBody, userMessage);
60
79
80
+ // Send POST request to OpenAI.
61
81
openai::start ();
62
82
nlohmann::json completion = openai::chat ().create (jsonBody);
63
83
84
+ // Get the response from OpenAI. Always use the top-1 response.
64
85
std::string gptResponse = completion[" choices" ][0 ][" message" ][" content" ];
65
- nlohmann::json gptResponseJson = parseJsonString (gptResponse);
66
86
87
+ // Parse the returned data as if it is json. Sometime the data may not be json, so we manually make sure it is.
88
+ nlohmann::json gptResponseJson = parseJsonString (gptResponse);
67
89
expression = gptResponseJson.contains (" Expression" ) ? gptResponseJson[" Expression" ] : " " ;
68
90
motion = gptResponseJson.contains (" Motion" ) ? gptResponseJson[" Motion" ] : " " ;
69
91
content = gptResponseJson.contains (" Content" ) ? gptResponseJson[" Content" ].get <std::string>() : gptResponse;
@@ -73,16 +95,21 @@ void Mailman::talk(QString prompts, QString azureSetting) {
73
95
reconstructedJson[" Motion" ] = motion;
74
96
reconstructedJson[" Content" ] = content;
75
97
98
+ // Send the expression and motion to animate the Live2D model.
76
99
emit sendResponseMove (expression, motion);
100
+
101
+ // Save the json to Qt StandardItemModel.
77
102
emit updateChatModel (reconstructedJson, " Incoming" );
78
103
104
+ // Stitch the json with the main json.
79
105
nlohmann::json gptMessage = getNewMessage (" assistant" , reconstructedJson.dump ());
80
106
jsonBody = insertMessage (jsonBody, gptMessage);
81
107
108
+ // Voice the reply.
82
109
auto chatGptVoiceOutput = speechSynthesizer->SpeakTextAsync (content).get ();}
83
110
}
84
111
85
-
112
+ // Stop the thread.
86
113
QThread::currentThread ()->quit ();
87
114
QThread::currentThread ()->wait ();
88
115
}
@@ -99,6 +126,7 @@ nlohmann::json Mailman::getNewMessage(std::string role, std::string content)
99
126
}
100
127
101
128
void Mailman::chat (QString prompts, QString userInput) {
129
+ // This member is largely the same as talk(), but it is only a one-turn process and has no while loop.
102
130
static QRegularExpression fingNewLineChar (" [\\ n]" );
103
131
std::string cleanprompt = prompts.remove (fingNewLineChar).toStdString ();
104
132
@@ -141,6 +169,7 @@ void Mailman::chat(QString prompts, QString userInput) {
141
169
142
170
nlohmann::json Mailman::insertMessage (nlohmann::json json, nlohmann::json newJson)
143
171
{
172
+ // Stitch the reply from ChatGPT so that it can track the context. Without stitching, the prompt will be forgotten in no time.
144
173
if (newJson.size ())
145
174
{
146
175
if (newJson.is_array ())
0 commit comments