forked from openvinotoolkit/openvino.genai
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge releases/2024/5 into master (openvinotoolkit#1174)
Co-authored-by: Ryan Metcalfe <[email protected]> Co-authored-by: yatarkan <[email protected]> Co-authored-by: Ilya Lavrenov <[email protected]> Co-authored-by: TolyaTalamanov <[email protected]> Co-authored-by: Ryan Metcalfe <[email protected]> Co-authored-by: wgzintel <[email protected]> Co-authored-by: Sergey Lyalin <[email protected]> Co-authored-by: Ekaterina Aidova <[email protected]> Co-authored-by: Chen Peter <[email protected]>
- Loading branch information
1 parent
72ce6de
commit 4017c8f
Showing
14 changed files
with
486 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
// Copyright (C) 2023-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include "imwrite.hpp" | ||
#include "openvino/genai/image_generation/text2image_pipeline.hpp" | ||
|
||
int32_t main(int32_t argc, char* argv[]) try { | ||
OPENVINO_ASSERT(argc >= 3 && argc <= 6, | ||
"Usage: ", | ||
argv[0], | ||
" <MODEL_DIR> '<PROMPT>' [ <TXT_ENCODE_DEVICE> <UNET_DEVICE> <VAE_DEVICE> ]"); | ||
|
||
const std::string models_path = argv[1], prompt = argv[2]; | ||
|
||
std::filesystem::path root_dir = models_path; | ||
|
||
const int width = 512; | ||
const int height = 512; | ||
const float guidance_scale = 7.5f; | ||
const int number_of_images_to_generate = 1; | ||
const int number_of_inference_steps_per_image = 20; | ||
|
||
// Set devices to command-line args if specified, otherwise default to CPU. | ||
// Note that these can be set to CPU, GPU, or NPU. | ||
const std::string text_encoder_device = (argc > 3) ? argv[3] : "CPU"; | ||
const std::string unet_device = (argc > 4) ? argv[4] : "CPU"; | ||
const std::string vae_decoder_device = (argc > 5) ? argv[5] : "CPU"; | ||
|
||
std::cout << "text_encoder_device: " << text_encoder_device << std::endl; | ||
std::cout << "unet_device: " << unet_device << std::endl; | ||
std::cout << "vae_decoder_device: " << vae_decoder_device << std::endl; | ||
|
||
// this is the path to where compiled models will get cached | ||
// (so that the 'compile' method run much faster 2nd+ time) | ||
std::string ov_cache_dir = "./cache"; | ||
|
||
// | ||
// Step 1: Prepare each Text2Image subcomponent (scheduler, text encoder, unet, vae) separately. | ||
// | ||
|
||
// Create the scheduler from the details listed in the json. | ||
auto scheduler = ov::genai::Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"); | ||
|
||
// Note that we could have created the scheduler by specifying specific type (for example EULER_DISCRETE), like | ||
// this: auto scheduler = ov::genai::Scheduler::from_config(root_dir / "scheduler/scheduler_config.json", | ||
// ov::genai::Scheduler::Type::EULER_DISCRETE); | ||
|
||
// Create unet object | ||
auto unet = ov::genai::UNet2DConditionModel(root_dir / "unet"); | ||
|
||
// Given the guidance scale, etc., calculate the batch size. | ||
int unet_batch_size = 1; | ||
if (guidance_scale > 1.0f && unet.get_config().time_cond_proj_dim < 0) { | ||
unet_batch_size = 2; | ||
} | ||
|
||
// Create, reshape, and compile the text encoder. | ||
auto text_encoder = ov::genai::CLIPTextModel(root_dir / "text_encoder"); | ||
text_encoder.reshape(unet_batch_size); | ||
text_encoder.compile(text_encoder_device, ov::cache_dir(ov_cache_dir)); | ||
|
||
// The max_postiion_embeddings config from text encoder will be used as a parameter to unet reshape. | ||
int max_position_embeddings = text_encoder.get_config().max_position_embeddings; | ||
|
||
// Reshape unet to a static shape, and compile it. | ||
unet.reshape(unet_batch_size, height, width, max_position_embeddings); | ||
unet.compile(unet_device, ov::cache_dir(ov_cache_dir)); | ||
|
||
// Create, reshape, and compile the vae decoder. | ||
auto vae = ov::genai::AutoencoderKL(root_dir / "vae_decoder"); | ||
vae.reshape(1, height, width); // We set batch-size to '1' here, as we're configuring our pipeline to return 1 | ||
// image per 'generate' call. | ||
vae.compile(vae_decoder_device, ov::cache_dir(ov_cache_dir)); | ||
|
||
// | ||
// Step 2: Create a Text2ImagePipeline from the individual subcomponents | ||
// | ||
auto pipe = ov::genai::Text2ImagePipeline::stable_diffusion(scheduler, text_encoder, unet, vae); | ||
|
||
// | ||
// Step 3: Use the Text2ImagePipeline to generate 'number_of_images_to_generate' images. | ||
// | ||
for (int imagei = 0; imagei < number_of_images_to_generate; imagei++) { | ||
std::cout << "Generating image " << imagei << std::endl; | ||
|
||
ov::Tensor image = pipe.generate(prompt, | ||
ov::genai::width(width), | ||
ov::genai::height(height), | ||
ov::genai::guidance_scale(guidance_scale), | ||
ov::genai::num_inference_steps(number_of_inference_steps_per_image)); | ||
|
||
imwrite("image_" + std::to_string(imagei) + ".bmp", image, true); | ||
} | ||
|
||
return EXIT_SUCCESS; | ||
} catch (const std::exception& error) { | ||
try { | ||
std::cerr << error.what() << '\n'; | ||
} catch (const std::ios_base::failure&) { | ||
} | ||
return EXIT_FAILURE; | ||
} catch (...) { | ||
try { | ||
std::cerr << "Non-exception object thrown\n"; | ||
} catch (const std::ios_base::failure&) { | ||
} | ||
return EXIT_FAILURE; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
// Copyright (C) 2023-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#pragma once | ||
|
||
#include "openvino/genai/image_generation/unet2d_condition_model.hpp" | ||
|
||
namespace ov { | ||
namespace genai { | ||
|
||
class UNet2DConditionModel::UNetInference { | ||
|
||
public: | ||
virtual void compile(std::shared_ptr<ov::Model> model, const std::string& device, const ov::AnyMap& properties) = 0; | ||
virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) = 0; | ||
virtual void set_adapters(AdapterController& adapter_controller, const AdapterConfig& adapters) = 0; | ||
virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) = 0; | ||
|
||
// utility function to resize model given optional dimensions. | ||
static void reshape(std::shared_ptr<ov::Model> model, | ||
std::optional<int> batch_size = {}, | ||
std::optional<int> height = {}, | ||
std::optional<int> width = {}, | ||
std::optional<int> tokenizer_model_max_length = {}) | ||
{ | ||
std::map<std::string, ov::PartialShape> name_to_shape; | ||
for (auto&& input : model->inputs()) { | ||
std::string input_name = input.get_any_name(); | ||
name_to_shape[input_name] = input.get_partial_shape(); | ||
if (input_name == "timestep") { | ||
name_to_shape[input_name][0] = 1; | ||
} else if (input_name == "sample") { | ||
if (batch_size) { | ||
name_to_shape[input_name][0] = *batch_size; | ||
} | ||
|
||
if (height) { | ||
name_to_shape[input_name][2] = *height; | ||
} | ||
|
||
if (width) { | ||
name_to_shape[input_name][3] = *width; | ||
} | ||
} else if (input_name == "time_ids" || input_name == "text_embeds") { | ||
if (batch_size) { | ||
name_to_shape[input_name][0] = *batch_size; | ||
} | ||
} else if (input_name == "encoder_hidden_states") { | ||
if (batch_size) { | ||
name_to_shape[input_name][0] = *batch_size; | ||
} | ||
|
||
if (tokenizer_model_max_length) { | ||
name_to_shape[input_name][1] = *tokenizer_model_max_length; | ||
} | ||
} else if (input_name == "timestep_cond") { | ||
if (batch_size) { | ||
name_to_shape[input_name][0] = *batch_size; | ||
} | ||
} | ||
} | ||
|
||
model->reshape(name_to_shape); | ||
} | ||
}; | ||
|
||
} // namespace genai | ||
} // namespace ov |
Oops, something went wrong.