diff --git a/README.md b/README.md index 8af1a64..18049e2 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,40 @@ English | [简体中文](https://github.com/cnblogs/dashscope-sdk/blob/main/README.zh-Hans.md) +# Cnblogs.DashScopeSDK + [![NuGet Version](https://img.shields.io/nuget/v/Cnblogs.DashScope.AI?style=flat&logo=nuget&label=Cnblogs.DashScope.AI)](https://www.nuget.org/packages/Cnblogs.DashScope.AI) [![NuGet Version](https://img.shields.io/nuget/v/Cnblogs.DashScope.Sdk?style=flat&logo=nuget&label=Cnblogs.DashScope.Sdk&link=https%3A%2F%2Fwww.nuget.org%2Fpackages%2FCnblogs.DashScope.Sdk)](https://www.nuget.org/packages/Cnblogs.DashScope.Sdk) [![NuGet Version](https://img.shields.io/nuget/v/Cnblogs.DashScope.AspNetCore?style=flat&logo=nuget&label=Cnblogs.DashScope.AspNetCore&link=https%3A%2F%2Fwww.nuget.org%2Fpackages%2FCnblogs.DashScope.AspNetCore)](https://www.nuget.org/packages/Cnblogs.DashScope.AspNetCore) -# DashScope SDK for .NET - -An unofficial DashScope SDK maintained by Cnblogs. - -**Warning**: this project is under active development, **Breaking Changes** may introduced without notice or major version change. Make sure you read the Release Notes before upgrading. +A non-official DashScope (Bailian) service SDK maintained by Cnblogs. -# Quick Start +**Note:** This project is actively under development. Breaking changes may occur even in minor versions. Please review the Release Notes before upgrading. -## Using `Microsoft.Extensions.AI` +## Quick Start -Install `Cnblogs.DashScope.AI` Package +### Using `Microsoft.Extensions.AI` Interface +Install NuGet package `Cnblogs.DashScope.AI` ```csharp var client = new DashScopeClient("your-api-key").AsChatClient("qwen-max"); var completion = await client.CompleteAsync("hello"); Console.WriteLine(completion) ``` -## Console App - -Install `Cnblogs.DashScope.Sdk` package. +### Console Application +Install NuGet package `Cnblogs.DashScope.Sdk` ```csharp var client = new DashScopeClient("your-api-key"); var completion = await client.GetQWenCompletionAsync(QWenLlm.QWenMax, prompt); -// or pass the model name string directly. +// Or use model name string // var completion = await client.GetQWenCompletionAsync("qwen-max", prompt); Console.WriteLine(completion.Output.Text); ``` -## ASP.NET Core +### ASP.NET Core Application -Install the Cnblogs.DashScope.AspNetCore package. +Install NuGet package `Cnblogs.DashScope.AspNetCore` `Program.cs` ```csharp @@ -44,86 +42,42 @@ builder.AddDashScopeClient(builder.Configuration); ``` `appsettings.json` + ```json { "DashScope": { - "ApiKey": "your-api-key", + "ApiKey": "your-api-key" } } ``` -`Usage` +Application class: + ```csharp public class YourService(IDashScopeClient client) { public async Task CompletePromptAsync(string prompt) { - var completion = await client.GetQWenCompletionAsync(QWenLlm.QWenMax, prompt); - return completion.Output.Text; + var completion = await client.GetQWenCompletionAsync(QWenLlm.QWenMax, prompt); + return completion.Output.Text; } } ``` +## Supported APIs +- [Chat](#Chat) - QWen3, DeepSeek, etc. Supports reasoning, tool calling, web search, translation +- [Multimodal](#multimodal) - QWen-VL, QVQ, etc. Supports reasoning, visual understanding, OCR, audio understanding +- [Text-to-Speech (TTS)](#Text-to-Speech) - CosyVoice, Sambert +- [Image Generation](#image-generation) - Wanx2.1 (text-to-image, portrait style transfer) +- [Application Call](#application-call) +- [Text Vectorization](#text-vectorization) -# Supported APIs -- Text Embedding API - `GetTextEmbeddingsAsync()` -- Text Generation API(qwen-turbo, qwen-max, etc.) - `GetQWenCompletionAsync()` and `GetQWenCompletionStreamAsync()` -- DeepSeek Models - `GetDeepSeekCompletionAsync()` and `GetDeepSeekCompletionStreamAsync()` -- BaiChuan Models - Use `GetBaiChuanTextCompletionAsync()` -- LLaMa2 Models - `GetLlama2TextCompletionAsync()` -- Multimodal Generation API(qwen-vl-max, etc.) - `GetQWenMultimodalCompletionAsync()` and `GetQWenMultimodalCompletionStreamAsync()` -- Wanx Models(Image generation, background generation, etc) - - Image Synthesis - `CreateWanxImageSynthesisTaskAsync()` and `GetWanxImageSynthesisTaskAsync()` - - Image Generation - `CreateWanxImageGenerationTaskAsync()` and `GetWanxImageGenerationTaskAsync()` - - Background Image Generation - `CreateWanxBackgroundGenerationTaskAsync()` and `GetWanxBackgroundGenerationTaskAsync()` -- File API that used by Qwen-Long - `UploadFileAsync()` and `DeleteFileAsync` -- Application call - `GetApplicationResponseAsync()` and `GetApplicationResponseStreamAsync()` +### Chat -# Examples +Use `GetTextCompletionAsync`/`GetTextCompletionStreamAsync` for direct text generation. +For QWen and DeepSeek, use shortcuts: `GetQWenChatCompletionAsync`/`GetDeepSeekChatCompletionAsync` -Visit [snapshots](./test/Cnblogs.DashScope.Tests.Shared/Utils/Snapshots.cs) for calling samples. - -Visit [tests](./test/Cnblogs.DashScope.Sdk.UnitTests) for more usage of each api. - -## General Text Completion API - -Use `client.GetTextCompletionAsync` and `client.GetTextCompletionStreamAsync` to access text generation api directly. - -```csharp -var completion = await dashScopeClient.GetTextCompletionAsync( - new ModelRequest - { - Model = "your-model-name", - Input = new TextGenerationInput { Prompt = prompt }, - Parameters = new TextGenerationParameters() - { - // control parameters as you wish. - EnableSearch = true - } - }); -var completions = dashScopeClient.GetTextCompletionStreamAsync( - new ModelRequest - { - Model = "your-model-name", - Input = new TextGenerationInput { Messages = [TextChatMessage.System("you are a helpful assistant"), TextChatMessage.User("How are you?")] }, - Parameters = new TextGenerationParameters() - { - // control parameters as you wish. - EnableSearch = true, - IncreamentalOutput = true - } - }); -``` - -## Single Text Completion - -```csharp -var prompt = "hello" -var completion = await client.GetQWenCompletionAsync(QWenLlm.QWenMax, prompt); -Console.WriteLine(completion.Output.Text); -``` - -## Multi-round chat +[Official Documentation](https://help.aliyun.com/zh/model-studio/user-guide/text-generation/) ```csharp var history = new List @@ -140,145 +94,191 @@ var completion = await client.GetQWenChatCompletionAsync(QWenLlm.QWenMax, histor Console.WriteLine(completion.Output.Choices[0].Message.Content); // The number is 42 ``` -## Reasoning - -Use `completion.Output.Choices![0].Message.ReasoningContent` to access the thoughts from reasoning model. +#### Reasoning +Access model thoughts via `ReasoningContent` property ```csharp -var history = new List +var history = new List { - ChatMessage.User("Calculate 1+1") + TextChatMessage.User("Calculate 1+1") }; var completion = await client.GetDeepSeekChatCompletionAsync(DeepSeekLlm.DeepSeekR1, history); Console.WriteLine(completion.Output.Choices[0]!.Message.ReasoningContent); ``` - -### QWen3 - -Use `TextGenerationParameters.EnableThinking` to toggle reasoning. - +For QWen3 models, enable reasoning with `TextGenerationParameters.EnableThinking` ```csharp var stream = dashScopeClient - .GetQWenChatStreamAsync( - QWenLlm.QWenPlusLatest, - history, - new TextGenerationParameters - { - IncrementalOutput = true, - ResultFormat = ResultFormats.Message, - EnableThinking = true - }); + .GetQWenChatStreamAsync( + QWenLlm.QWenPlusLatest, + history, + new TextGenerationParameters + { + IncrementalOutput = true, + ResultFormat = ResultFormats.Message, + EnableThinking = true + }); ``` -## Function Call - -Creates a function with parameters - +#### Tool Calling +Define a function for model to use: ```csharp string GetCurrentWeather(GetCurrentWeatherParameters parameters) { - // actual implementation should be different. - return "Sunny, 14" + parameters.Unit switch - { - TemperatureUnit.Celsius => "℃", - TemperatureUnit.Fahrenheit => "℉" - }; + return "Sunny"; } - public record GetCurrentWeatherParameters( [property: Required] - [property: Description("The city and state, e.g. San Francisco, CA")] + [property: Description("City and state, e.g. San Francisco, CA")] string Location, [property: JsonConverter(typeof(EnumStringConverter))] TemperatureUnit Unit = TemperatureUnit.Celsius); - -public enum TemperatureUnit -{ - Celsius, - Fahrenheit -} +public enum TemperatureUnit { Celsius, Fahrenheit } ``` - -Append tool information to chat messages (Here we use `JsonSchema.NET` to generate JSON Schema). - +Invoke with tool definitions. We using `JsonSchema.Net` for example, you could use any other library to generate JSON schema) ```csharp -var tools = new List() +var tools = new List { new( ToolTypes.Function, new FunctionDefinition( nameof(GetCurrentWeather), - "Get the weather abount given location", + "Get current weather", new JsonSchemaBuilder().FromType().Build())) }; +var history = new List { ChatMessage.User("What's the weather in CA?") }; +var parameters = new TextGenerationParameters { ResultFormat = ResultFormats.Message, Tools = tools }; -var history = new List -{ - ChatMessage.User("What is the weather today in C.A?") -}; - -var parameters = new TextGenerationParamters() -{ - ResultFormat = ResultFormats.Message, - Tools = tools -}; - -// send question with available tools. +// request model var completion = await client.GetQWenChatCompletionAsync(QWenLlm.QWenMax, history, parameters); -history.Add(completion.Output.Choice[0].Message); - -// model responding with tool calls. Console.WriteLine(completion.Output.Choice[0].Message.ToolCalls[0].Function.Name); // GetCurrentWeather +history.Add(completion.Output.Choice[0].Message); -// calling tool that model requests and append result into history. -var result = GetCurrentWeather(JsonSerializer.Deserialize(completion.Output.Choice[0].Message.ToolCalls[0].Function.Arguments)); -history.Add(ChatMessage.Tool(result, nameof(GetCurrentWeather))); +// calls tool +var result = GetCurrentWeather(new() { Location = "CA" }); +history.Add(new("tool", result, nameof(GetCurrentWeather))); -// get back answers. +// Get final answer completion = await client.GetQWenChatCompletionAsync(QWenLlm.QWenMax, history, parameters); -Console.WriteLine(completion.Output.Choice[0].Message.Content); +Console.WriteLine(completion.Output.Choices[0].Message.Content); // "Current weather in California: Sunny" +``` +#### File Upload (Long Context Models) +For Qwen-Long models: +```csharp +var file = new FileInfo("test.txt"); +var uploadedFile = await dashScopeClient.UploadFileAsync(file.OpenRead(), file.Name); +var history = new List { ChatMessage.File(uploadedFile.Id) }; +var completion = await client.GetQWenChatCompletionAsync(QWenLlm.QWenLong, history); +Console.WriteLine(completion.Output.Choices[0].Message.Content); +// Cleanup +await dashScopeClient.DeleteFileAsync(uploadedFile.Id); ``` +### Multimodal +Use `GetMultimodalGenerationAsync`/`GetMultimodalGenerationStreamAsync` +[Official Documentation](https://help.aliyun.com/zh/model-studio/multimodal) -Append the tool calling result with `tool` role, then model will generate answers based on tool calling result. +```csharp +var image = await File.ReadAllBytesAsync("Lenna.jpg"); +var response = dashScopeClient.GetMultimodalGenerationStreamAsync( + new ModelRequest() + { + Model = "qvq-plus", + Input = new MultimodalInput() + { + Messages = + [ + MultimodalMessage.User( + [ + MultimodalMessageContent.ImageContent(image, "image/jpeg"), + MultimodalMessageContent.TextContent("她是谁?") + ]) + ] + }, + Parameters = new MultimodalParameters { IncrementalOutput = true, VlHighResolutionImages = false } + }); + +// output +var reasoning = false; +await foreach (var modelResponse in response) +{ + var choice = modelResponse.Output.Choices.FirstOrDefault(); + if (choice != null) + { + if (choice.FinishReason != "null") + { + break; + } + if (string.IsNullOrEmpty(choice.Message.ReasoningContent) == false) + { + if (reasoning == false) + { + reasoning = true; + Console.WriteLine(""); + } -## QWen-Long with files + Console.Write(choice.Message.ReasoningContent); + continue; + } -Upload file first. + if (reasoning) + { + reasoning = false; + Console.WriteLine(""); + } -```csharp -var file = new FileInfo("test.txt"); -var uploadedFile = await dashScopeClient.UploadFileAsync(file.OpenRead(), file.Name); + Console.Write(choice.Message.Content[0].Text); + } +} ``` +### Text-to-Speech + +Create a speech synthesis session using `dashScopeClient.CreateSpeechSynthesizerSocketSessionAsync()`. -Using uploaded file id in messages. +Note: Use the using statement to automatically dispose the session, or manually call Dispose() to release resources. Avoid reusing sessions. +Create a synthesis session: ```csharp -var history = new List +using var tts = await dashScopeClient.CreateSpeechSynthesizerSocketSessionAsync("cosyvoice-v2"); +var taskId = await tts.RunTaskAsync(new SpeechSynthesizerParameters { Voice = "longxiaochun_v2", Format = "mp3" }); +await tts.ContinueTaskAsync(taskId, "Cnblogs"); +await tts.ContinueTaskAsync(taskId, "Code changes the world"); +await tts.FinishTaskAsync(taskId); +var file = new FileInfo("tts.mp3"); +using var stream = file.OpenWrite(); +await foreach (var b in tts.GetAudioAsync()) { - ChatMessage.File(uploadedFile.Id), // use array for multiple files, e.g. [file1.Id, file2.Id] - ChatMessage.User("Summarize the content of file.") + stream.WriteByte(b); } -var parameters = new TextGenerationParameters() -{ - ResultFormat = ResultFormats.Message -}; -var completion = await client.GetQWenChatCompletionAsync(QWenLlm.QWenLong, history, parameters); -Console.WriteLine(completion.Output.Choices[0].Message.Content); +Console.WriteLine($"Audio saved to {file.FullName}"); ``` - -Delete file if needed - +### Image Generation +#### Text-to-Image +Use shortcuts for Wanx models: ```csharp -var deletionResult = await dashScopeClient.DeleteFileAsync(uploadedFile.Id); +var task = await dashScopeClient.CreateWanxImageSynthesisTaskAsync( + WanxModel.WanxV21Turbo, + "A futuristic cityscape at sunset", + new ImageSynthesisParameters { Style = ImageStyles.OilPainting }); +// Pull status +while (true) +{ + var result = await dashScopeClient.GetWanxImageSynthesisTaskAsync(task.TaskId); + if (result.Output.TaskStatus == DashScopeTaskStatus.Succeeded) + { + Console.WriteLine($"Image URL: {result.Output.Results[0].Url}"); + break; + } + await Task.Delay(500); +} ``` +#### Portrait Style Transfer +Use `CreateWanxImageGenerationTaskAsync` and `GetWanxImageGenerationTaskAsync` -## Application call - -Use `GetApplicationResponseAsync` to call an application. +#### Background Generation -Use `GetApplicationResponseStreamAsync` for streaming output. +Use `CreateWanxBackgroundGenerationTaskAsync` and `GetWanxBackgroundGenerationTaskAsync` +### Application Call ```csharp var request = new ApplicationRequest() @@ -300,8 +300,7 @@ var request = var response = await client.GetApplicationResponseAsync("your-application-id", request); Console.WriteLine(response.Output.Text); ``` - -`ApplicationRequest` use an `Dictionary` as `BizParams` by default. +`ApplicationRequest` uses `Dictionary` as the default type for `BizParams`. ```csharp var request = @@ -319,14 +318,13 @@ var request = var response = await client.GetApplicationResponseAsync("your-application-id", request); Console.WriteLine(response.Output.Text); ``` - -You can use the generic version `ApplicationRequest` for strong-typed `BizParams`. But keep in mind that client use `snake_case` by default when doing json serialization, you may need to use `[JsonPropertyName("camelCase")]` for other type of naming policy. +For strong typing support, you can use the generic class `ApplicationRequest`. +Note that the SDK uses `snake_case` for JSON serialization. If your application uses different naming conventions, manually specify the serialized property names using `[JsonPropertyName("camelCase")]`. ```csharp public record TestApplicationBizParam( [property: JsonPropertyName("sourceCode")] string SourceCode); - var request = new ApplicationRequest() { @@ -340,3 +338,18 @@ var response = await client.GetApplicationResponseAsync("your-application-id", r Console.WriteLine(response.Output.Text); ``` +### Text Vectorization + +```csharp +var text = "Sample text for embedding"; +var response = await dashScopeClient.GetTextEmbeddingsAsync( + TextEmbeddingModel.TextEmbeddingV4, + [text], + new TextEmbeddingParameters { Dimension = 512 }); +var embedding = response.Output.Embeddings.First().Embedding; +Console.WriteLine($"Embedding vector length: {embedding.Length}"); +``` + +See [Snapshot Files](./test/Cnblogs.DashScope.Tests.Shared/Utils/Snapshots.cs) for API parameter examples. + +Review [Tests](./test) for comprehensive usage examples. diff --git a/README.zh-Hans.md b/README.zh-Hans.md index b2545c3..aea5642 100644 --- a/README.zh-Hans.md +++ b/README.zh-Hans.md @@ -1,18 +1,18 @@ [English](https://github.com/cnblogs/dashscope-sdk/blob/main/README.md) | 简体中文 +# Cnblogs.DashScopeSDK + [![NuGet Version](https://img.shields.io/nuget/v/Cnblogs.DashScope.AI?style=flat&logo=nuget&label=Cnblogs.DashScope.AI)](https://www.nuget.org/packages/Cnblogs.DashScope.AI) [![NuGet Version](https://img.shields.io/nuget/v/Cnblogs.DashScope.Sdk?style=flat&logo=nuget&label=Cnblogs.DashScope.Sdk&link=https%3A%2F%2Fwww.nuget.org%2Fpackages%2FCnblogs.DashScope.Sdk)](https://www.nuget.org/packages/Cnblogs.DashScope.Sdk) [![NuGet Version](https://img.shields.io/nuget/v/Cnblogs.DashScope.AspNetCore?style=flat&logo=nuget&label=Cnblogs.DashScope.AspNetCore&link=https%3A%2F%2Fwww.nuget.org%2Fpackages%2FCnblogs.DashScope.AspNetCore)](https://www.nuget.org/packages/Cnblogs.DashScope.AspNetCore) -# Cnblogs.DashScopeSDK - 由博客园维护并使用的非官方灵积(百炼)服务 SDK。 使用前注意:当前项目正在积极开发中,小版本也可能包含破坏性更改,升级前请查看对应版本 Release Note 进行迁移。 -# 快速开始 +## 快速开始 -## 使用 `Microsoft.Extensions.AI` 接口 +### 使用 `Microsoft.Extensions.AI` 接口 安装 NuGet 包 `Cnblogs.DashScope.AI` @@ -22,7 +22,7 @@ var completion = await client.CompleteAsync("hello"); Console.WriteLine(completion) ``` -## 控制台应用 +### 控制台应用 安装 NuGet 包 `Cnblogs.DashScope.Sdk`。 @@ -34,7 +34,7 @@ var completion = await client.GetQWenCompletionAsync(QWenLlm.QWenMax, prompt); Console.WriteLine(completion.Output.Text); ``` -## ASP.NET Core 应用 +### ASP.NET Core 应用 安装 NuGet 包 `Cnblogs.DashScope.AspNetCore`。 @@ -64,70 +64,22 @@ public class YourService(IDashScopeClient client) } ``` -# 支持的 API +## 支持的 API -- 通用文本向量 - `GetTextEmbeddingsAsync()` -- 通义千问(`qwen-turbo`, `qwen-max` 等) - `GetQWenCompletionAsync()` 和 `GetQWenCompletionStreamAsync()` -- DeepSeek 系列模型(`deepseek-r1`,`deepseek-v3` 等) - `GetDeepSeekChatCompletionAsync()` 和 `GetDeepSeekChatCompletionStreamAsync()` -- 百川开源大模型 - `GetBaiChuanTextCompletionAsync()` -- LLaMa2 大语言模型 - `GetLlama2TextCompletionAsync()` -- 通义千问 VL 和通义千问 Audio(`qwen-vl-max`, `qwen-audio`) - `GetQWenMultimodalCompletionAsync()` 和 `GetQWenMultimodalCompletionStreamAsync()` -- 通义万相系列 - - 文生图 - `CreateWanxImageSynthesisTaskAsync()` 和 `GetWanxImageSynthesisTaskAsync()` - - 人像风格重绘 - `CreateWanxImageGenerationTaskAsync()` 和 `GetWanxImageGenerationTaskAsync()` - - 图像背景生成 - `CreateWanxBackgroundGenerationTaskAsync()` 和 `GetWanxBackgroundGenerationTaskAsync()` -- 适用于 QWen-Long 的文件 API `UploadFileAsync()` 和 `DeleteFileAsync` -- 应用调用 `GetApplicationResponseAsync` 和 `GetApplicationResponseStreamAsync()` -- 其他使用相同 Endpoint 的模型 +- [对话](#对话) - QWen3, DeepSeek 等,支持推理/工具调用/网络搜索/翻译等场景 +- [多模态](#多模态) - QWen-VL,QVQ 等,支持推理/视觉理解/OCR/音频理解等场景 +- [语音合成](#语音合成) - CosyVoice,Sambert 等,支持 TTS 等应用场景 +- [图像生成](#图像生成) - wanx2.1 等,支持文生图,人像风格重绘等应用场景 +- [应用调用](#应用调用) +- [文本向量](#文本向量) -# 示例 - -查看 [快照文件](./test/Cnblogs.DashScope.Tests.Shared/Utils/Snapshots.cs) 获得 API 调用参数示例. - -查看 [测试](./test) 获得更多 API 使用示例。 - -## 文本生成 +### 对话 使用 `dashScopeClient.GetTextCompletionAsync` 和 `dashScopeClient.GetTextCompletionStreamAsync` 来直接访问文本生成接口。 -相关文档:https://help.aliyun.com/zh/model-studio/user-guide/text-generation/ - -```csharp -var completion = await dashScopeClient.GetTextCompletionAsync( - new ModelRequest - { - Model = "your-model-name", - Input = new TextGenerationInput { Prompt = prompt }, - Parameters = new TextGenerationParameters() - { - // control parameters as you wish. - EnableSearch = true - } - }); - -var completions = dashScopeClient.GetTextCompletionStreamAsync( - new ModelRequest - { - Model = "your-model-name", - Input = new TextGenerationInput { Messages = [TextChatMessage.System("you are a helpful assistant"), TextChatMessage.User("How are you?")] }, - Parameters = new TextGenerationParameters() - { - // control parameters as you wish. - EnableSearch = true, - IncreamentalOutput = true - } - }); -``` - -## 单轮对话 +针对通义千问和 DeekSeek,我们提供了快捷方法进行调用: `GetQWenChatCompletionAsync` /`GetDeepSeekChatCompletionAsync` -```csharp -var prompt = "你好" -var completion = await client.GetQWenCompletionAsync(QWenLlm.QWenMax, prompt); -Console.WriteLine(completion.Output.Text); -``` - -## 多轮对话 +相关文档:https://help.aliyun.com/zh/model-studio/user-guide/text-generation/ ```csharp var history = new List @@ -144,7 +96,7 @@ var completion = await client.GetQWenChatCompletionAsync(QWenLlm.QWenMax, histor Console.WriteLine(completion.Output.Choices[0].Message.Content); // The number is 42 ``` -## 推理 +#### 推理 使用推理模型时,模型的思考过程可以通过 `ReasoningContent` 属性获取。 @@ -157,9 +109,7 @@ var completion = await client.GetDeepSeekChatCompletionAsync(DeepSeekLlm.DeepSee Console.WriteLine(completion.Output.Choices[0]!.Message.ReasoningContent); ``` -### QWen3 - -使用 `TextGenerationParameters.EnableThinking` 决定是否使用模型的推理能力。 +对于支持的模型(例如 qwen3),可以使用 `TextGenerationParameters.EnableThinking` 决定是否使用模型的推理能力。 ```csharp var stream = dashScopeClient @@ -174,7 +124,7 @@ var stream = dashScopeClient }); ``` -## 工具调用 +#### 工具调用 创建一个可供模型使用的方法。 @@ -241,9 +191,9 @@ Console.WriteLine(completion.Output.Choice[0].Message.Content) // 现在浙江 当模型认为应当调用工具时,返回消息中 `ToolCalls` 会提供调用的详情,本地在调用完成后可以把结果以 `tool` 角色返回。 -## 上传文件(QWen-Long) +#### 上传文件(qwen-long) -需要先提前将文件上传到 DashScope 来获得 Id。 +使用长上下文模型时,需要先提前将文件上传到 DashScope 来获得 Id。 ```csharp var file = new FileInfo("test.txt"); @@ -272,7 +222,162 @@ Console.WriteLine(completion.Output.Choices[0].Message.Content); var deletionResult = await dashScopeClient.DeleteFileAsync(uploadedFile.Id); ``` -## 应用调用 +### 多模态 + +使用 `dashScopeClient.GetMultimodalGenerationAsync` 和 `dashScopeClient.GetMultimodalGenerationStreamAsync` 来访问多模态文本生成接口。 + +相关文档:[多模态_大模型服务平台百炼(Model Studio)-阿里云帮助中心](https://help.aliyun.com/zh/model-studio/multimodal) + +#### 视觉理解/推理 + +使用 `MultimodalMessage.User()` 可以快速创建对应角色的消息。 + +媒体内容可以通过公网 URL 或者 `byte[]` 传入。 + +```csharp +var image = await File.ReadAllBytesAsync("Lenna.jpg"); +var response = dashScopeClient.GetMultimodalGenerationStreamAsync( + new ModelRequest() + { + Model = "qvq-plus", + Input = new MultimodalInput() + { + Messages = + [ + MultimodalMessage.User( + [ + MultimodalMessageContent.ImageContent(image, "image/jpeg"), + MultimodalMessageContent.TextContent("她是谁?") + ]) + ] + }, + Parameters = new MultimodalParameters { IncrementalOutput = true, VlHighResolutionImages = false } + }); + +// output +var reasoning = false; +await foreach (var modelResponse in response) +{ + var choice = modelResponse.Output.Choices.FirstOrDefault(); + if (choice != null) + { + if (choice.FinishReason != "null") + { + break; + } + + if (string.IsNullOrEmpty(choice.Message.ReasoningContent) == false) + { + if (reasoning == false) + { + reasoning = true; + Console.WriteLine(""); + } + + Console.Write(choice.Message.ReasoningContent); + continue; + } + + if (reasoning) + { + reasoning = false; + Console.WriteLine(""); + } + + Console.Write(choice.Message.Content[0].Text); + } +} +``` + +### 语音合成 + +通过 `dashScopeClient.CreateSpeechSynthesizerSocketSessionAsync()` 来创建一个语音合成会话。 + +**注意:使用 using 语句来自动释放会话,或者手动 Dispose 会话,尽量不要重用会话。** + +相关文档:[语音合成-CosyVoice_大模型服务平台百炼(Model Studio)-阿里云帮助中心](https://help.aliyun.com/zh/model-studio/cosyvoice-large-model-for-speech-synthesis) + +```csharp +using var tts = await dashScopeClient.CreateSpeechSynthesizerSocketSessionAsync("cosyvoice-v2"); +var taskId = await tts.RunTaskAsync( + new SpeechSynthesizerParameters { Voice = "longxiaochun_v2", Format = "mp3" }); +await tts.ContinueTaskAsync(taskId, "博客园"); +await tts.ContinueTaskAsync(taskId, "代码改变世界"); +await tts.FinishTaskAsync(taskId); +var file = new FileInfo("tts.mp3"); +using var stream = file.OpenWrite(); +await foreach (var b in tts.GetAudioAsync()) +{ + stream.WriteByte(b); +} + +stream.Close(); + +var tokenUsage = 0; +await foreach (var message in tts.GetMessagesAsync()) +{ + if (message.Payload.Usage?.Characters > tokenUsage) + { + tokenUsage = message.Payload.Usage.Characters; + } +} + +Console.WriteLine($"audio saved to {file.FullName}, token usage: {tokenUsage}"); +break; +``` + +### 图像生成 + +#### 文生图 + +我们针对通义万相提供了快捷 API `dashScopeClient.CreateWanxImageSynthesisTaskAsync()` 和 `GetWanxImageSynthesisTaskAsync()`。 + +图片生成需要数秒到数十秒不等,对于 HTTP 请求来说太长,需要通过任务方式生成。 + +先使用 `CreateWanxImageSynthesisTaskAsync()` 创建任务,再轮询 `GetWanxImageSynthesisTaskAsync()` 检查任务完成状态。 + +相关文档:[通义万相2.1文生图V2版API参考_大模型服务平台百炼(Model Studio)-阿里云帮助中心](https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference) + +```csharp +var prompt = Console.ReadLine(); +var task = await dashScopeClient.CreateWanxImageSynthesisTaskAsync( + WanxModel.WanxV21Turbo, + prompt, + null, + new ImageSynthesisParameters { Style = ImageStyles.OilPainting }); +Console.WriteLine($"Task({task.TaskId}) submitted, checking status..."); +var watch = Stopwatch.StartNew(); +while (watch.Elapsed.TotalSeconds < 120) +{ + var result = await dashScopeClient.GetWanxImageSynthesisTaskAsync(task.TaskId); + Console.WriteLine($"{watch.ElapsedMilliseconds}ms - Status: {result.Output.TaskStatus}"); + if (result.Output.TaskStatus == DashScopeTaskStatus.Succeeded) + { + Console.WriteLine($"Image generation finished, URL: {result.Output.Results![0].Url}"); + return; + } + + if (result.Output.TaskStatus == DashScopeTaskStatus.Failed) + { + Console.WriteLine($"Image generation failed, error message: {result.Output.Message}"); + return; + } + + await Task.Delay(500); +} + +Console.WriteLine($"Task timout, taskId: {task.TaskId}"); +``` + +#### 人像风格重绘和图像背景生成 + +与文生图类似,先创建任务,再轮询状态。 + +人像风格重绘 - `CreateWanxImageGenerationTaskAsync` 和 `GetWanxImageGenerationTaskAsync` + +图像背景生成 - `CreateWanxBackgroundGenerationTaskAsync` 和 `GetWanxBackgroundGenerationTaskAsync` + +### 应用调用 `GetApplicationResponseAsync` 用于进行应用调用。 @@ -339,3 +444,25 @@ var request = var response = await client.GetApplicationResponseAsync("your-application-id", request); Console.WriteLine(response.Output.Text); ``` + +### 文本向量 + +使用 `GetTextEmbeddingsAsync` 来调用文本向量接口。 + +相关文档:[通用文本向量同步接口API详情_大模型服务平台百炼(Model Studio)-阿里云帮助中心](https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api) + +```csharp +var text = Console.ReadLine(); +var response = await dashScopeClient.GetTextEmbeddingsAsync( + TextEmbeddingModel.TextEmbeddingV4, + [text], + new TextEmbeddingParameters() { Dimension = 512, }); +var array = response.Output.Embeddings.First().Embedding; +Console.WriteLine("Embedding"); +Console.WriteLine(string.Join('\n', array)); +Console.WriteLine($"Token usage: {response.Usage?.TotalTokens}"); +``` + +查看 [快照文件](./test/Cnblogs.DashScope.Tests.Shared/Utils/Snapshots.cs) 获得 API 调用参数示例. + +查看 [测试](./test) 获得更多 API 使用示例。 diff --git a/sample/Cnblogs.DashScope.Sample/Cnblogs.DashScope.Sample.csproj b/sample/Cnblogs.DashScope.Sample/Cnblogs.DashScope.Sample.csproj index 76001b4..fd1bb50 100644 --- a/sample/Cnblogs.DashScope.Sample/Cnblogs.DashScope.Sample.csproj +++ b/sample/Cnblogs.DashScope.Sample/Cnblogs.DashScope.Sample.csproj @@ -17,6 +17,9 @@ Always + + PreserveNewest + diff --git a/sample/Cnblogs.DashScope.Sample/Lenna.jpg b/sample/Cnblogs.DashScope.Sample/Lenna.jpg new file mode 100644 index 0000000..4030eed Binary files /dev/null and b/sample/Cnblogs.DashScope.Sample/Lenna.jpg differ diff --git a/sample/Cnblogs.DashScope.Sample/Program.cs b/sample/Cnblogs.DashScope.Sample/Program.cs index 6851d01..a0aa669 100644 --- a/sample/Cnblogs.DashScope.Sample/Program.cs +++ b/sample/Cnblogs.DashScope.Sample/Program.cs @@ -1,9 +1,12 @@ -using System.Text; +using System.Diagnostics; +using System.Text; using System.Text.Json; using Cnblogs.DashScope.Core; using Cnblogs.DashScope.Sample; using Cnblogs.DashScope.Sdk; using Cnblogs.DashScope.Sdk.QWen; +using Cnblogs.DashScope.Sdk.TextEmbedding; +using Cnblogs.DashScope.Sdk.Wanx; using Json.Schema; using Json.Schema.Generation; using Microsoft.Extensions.AI; @@ -48,9 +51,15 @@ case SampleType.ChatCompletionWithTool: await ChatWithToolsAsync(); break; + case SampleType.MultimodalCompletion: + await ChatWithImageAsync(); + break; case SampleType.ChatCompletionWithFiles: await ChatWithFilesAsync(); break; + case SampleType.Text2Image: + await Text2ImageAsync(); + break; case SampleType.MicrosoftExtensionsAi: await ChatWithMicrosoftExtensions(); break; @@ -73,13 +82,13 @@ await tts.ContinueTaskAsync(taskId, "代码改变世界"); await tts.FinishTaskAsync(taskId); var file = new FileInfo("tts.mp3"); - var writer = file.OpenWrite(); + await using var stream = file.OpenWrite(); await foreach (var b in tts.GetAudioAsync()) { - writer.WriteByte(b); + stream.WriteByte(b); } - writer.Close(); + stream.Close(); var tokenUsage = 0; await foreach (var message in tts.GetMessagesAsync()) @@ -93,6 +102,25 @@ Console.WriteLine($"audio saved to {file.FullName}, token usage: {tokenUsage}"); break; } + + case SampleType.TextEmbedding: + Console.Write("text> "); + var text = Console.ReadLine(); + if (string.IsNullOrEmpty(text)) + { + text = "Coding changes world"; + Console.WriteLine($"using default text: {text}"); + } + + var response = await dashScopeClient.GetTextEmbeddingsAsync( + TextEmbeddingModel.TextEmbeddingV3, + [text], + new TextEmbeddingParameters() { Dimension = 512, }); + var array = response.Output.Embeddings.First().Embedding; + Console.WriteLine("Embedding"); + Console.WriteLine(string.Join('\n', array)); + Console.WriteLine($"Token usage: {response.Usage?.TotalTokens}"); + break; } return; @@ -160,6 +188,60 @@ async Task ChatStreamAsync() // ReSharper disable once FunctionNeverReturns } +async Task ChatWithImageAsync() +{ + var image = await File.ReadAllBytesAsync("Lenna.jpg"); + var response = dashScopeClient.GetMultimodalGenerationStreamAsync( + new ModelRequest() + { + Model = "qvq-plus", + Input = new MultimodalInput() + { + Messages = + [ + MultimodalMessage.User( + [ + MultimodalMessageContent.ImageContent(image, "image/jpeg"), + MultimodalMessageContent.TextContent("她是谁?") + ]) + ] + }, + Parameters = new MultimodalParameters { IncrementalOutput = true, VlHighResolutionImages = false } + }); + var reasoning = false; + await foreach (var modelResponse in response) + { + var choice = modelResponse.Output.Choices.FirstOrDefault(); + if (choice != null) + { + if (choice.FinishReason != "null") + { + break; + } + + if (string.IsNullOrEmpty(choice.Message.ReasoningContent) == false) + { + if (reasoning == false) + { + reasoning = true; + Console.WriteLine(""); + } + + Console.Write(choice.Message.ReasoningContent); + continue; + } + + if (reasoning) + { + reasoning = false; + Console.WriteLine(""); + } + + Console.Write(choice.Message.Content[0].Text); + } + } +} + async Task ChatWithFilesAsync() { var history = new List(); @@ -258,6 +340,45 @@ async Task ChatWithMicrosoftExtensions() Console.WriteLine(JsonSerializer.Serialize(response, serializerOptions)); } +async Task Text2ImageAsync() +{ + Console.Write("Prompt> "); + var prompt = Console.ReadLine(); + if (string.IsNullOrEmpty(prompt)) + { + Console.WriteLine("Using sample prompt"); + prompt = "A fluffy cat"; + } + + var task = await dashScopeClient.CreateWanxImageSynthesisTaskAsync( + WanxModel.WanxV21Turbo, + prompt, + null, + new ImageSynthesisParameters { Style = ImageStyles.OilPainting }); + Console.WriteLine($"Task({task.TaskId}) submitted, checking status..."); + var watch = Stopwatch.StartNew(); + while (watch.Elapsed.TotalSeconds < 120) + { + var result = await dashScopeClient.GetWanxImageSynthesisTaskAsync(task.TaskId); + Console.WriteLine($"{watch.ElapsedMilliseconds}ms - Status: {result.Output.TaskStatus}"); + if (result.Output.TaskStatus == DashScopeTaskStatus.Succeeded) + { + Console.WriteLine($"Image generation finished, URL: {result.Output.Results![0].Url}"); + return; + } + + if (result.Output.TaskStatus == DashScopeTaskStatus.Failed) + { + Console.WriteLine($"Image generation failed, error message: {result.Output.Message}"); + return; + } + + await Task.Delay(500); + } + + Console.WriteLine($"Task timout, taskId: {task.TaskId}"); +} + async Task ApplicationCallAsync(string applicationId, string prompt) { var request = new ApplicationRequest { Input = new ApplicationInput { Prompt = prompt } }; diff --git a/sample/Cnblogs.DashScope.Sample/SampleType.cs b/sample/Cnblogs.DashScope.Sample/SampleType.cs index 138ed9a..c78d94a 100644 --- a/sample/Cnblogs.DashScope.Sample/SampleType.cs +++ b/sample/Cnblogs.DashScope.Sample/SampleType.cs @@ -12,6 +12,10 @@ public enum SampleType ChatCompletionWithFiles, + MultimodalCompletion, + + Text2Image, + MicrosoftExtensionsAi, MicrosoftExtensionsAiToolCall, @@ -19,4 +23,6 @@ public enum SampleType ApplicationCall, TextToSpeech, + + TextEmbedding } diff --git a/sample/Cnblogs.DashScope.Sample/SampleTypeDescriptor.cs b/sample/Cnblogs.DashScope.Sample/SampleTypeDescriptor.cs index a6d1b94..2cd398a 100644 --- a/sample/Cnblogs.DashScope.Sample/SampleTypeDescriptor.cs +++ b/sample/Cnblogs.DashScope.Sample/SampleTypeDescriptor.cs @@ -11,10 +11,13 @@ public static string GetDescription(this SampleType sampleType) SampleType.ChatCompletion => "Conversation between user and assistant", SampleType.ChatCompletionWithTool => "Function call sample", SampleType.ChatCompletionWithFiles => "File upload sample using qwen-long", + SampleType.MultimodalCompletion => "Multimodal completion", + SampleType.Text2Image => "Text to Image generation", SampleType.MicrosoftExtensionsAi => "Use with Microsoft.Extensions.AI", SampleType.MicrosoftExtensionsAiToolCall => "Use tool call with Microsoft.Extensions.AI interfaces", SampleType.ApplicationCall => "Call pre-defined application", SampleType.TextToSpeech => "TTS task", + SampleType.TextEmbedding => "Get text embedding", _ => throw new ArgumentOutOfRangeException(nameof(sampleType), sampleType, "Unsupported sample option") }; } diff --git a/src/Cnblogs.DashScope.Core/IImageSynthesisParameters.cs b/src/Cnblogs.DashScope.Core/IImageSynthesisParameters.cs index 5e241c6..70a3b47 100644 --- a/src/Cnblogs.DashScope.Core/IImageSynthesisParameters.cs +++ b/src/Cnblogs.DashScope.Core/IImageSynthesisParameters.cs @@ -24,4 +24,14 @@ public interface IImageSynthesisParameters /// Seed for randomizer, max at 4294967290. Once set, generated image will use seed, seed+1, seed+2, seed+3 depends on . /// public uint? Seed { get; } + + /// + /// Let LLM to rewrite your positive prompt, Defaults to true. + /// + public bool? PromptExtend { get; } + + /// + /// Adds AI-Generated watermark on bottom right corner. + /// + public bool? Watermark { get; } } diff --git a/src/Cnblogs.DashScope.Core/ImageSynthesisParameters.cs b/src/Cnblogs.DashScope.Core/ImageSynthesisParameters.cs index 58497fe..8ca30bd 100644 --- a/src/Cnblogs.DashScope.Core/ImageSynthesisParameters.cs +++ b/src/Cnblogs.DashScope.Core/ImageSynthesisParameters.cs @@ -16,4 +16,10 @@ public class ImageSynthesisParameters : IImageSynthesisParameters /// public uint? Seed { get; set; } + + /// + public bool? PromptExtend { get; set; } + + /// + public bool? Watermark { get; set; } } diff --git a/src/Cnblogs.DashScope.Core/MultimodalMessage.cs b/src/Cnblogs.DashScope.Core/MultimodalMessage.cs index d1034b0..48bcc44 100644 --- a/src/Cnblogs.DashScope.Core/MultimodalMessage.cs +++ b/src/Cnblogs.DashScope.Core/MultimodalMessage.cs @@ -7,7 +7,11 @@ namespace Cnblogs.DashScope.Core; /// /// The role associated with this message. /// The contents of this message. -public record MultimodalMessage(string Role, IReadOnlyList Content) +/// Thoughts from the model. +public record MultimodalMessage( + string Role, + IReadOnlyList Content, + string? ReasoningContent = null) : IMessage> { /// @@ -34,9 +38,12 @@ public static MultimodalMessage System(IReadOnlyList c /// Creates an assistant message. /// /// Message contents. + /// Thoughts from the model. /// - public static MultimodalMessage Assistant(IReadOnlyList contents) + public static MultimodalMessage Assistant( + IReadOnlyList contents, + string? reasoningContent = null) { - return new MultimodalMessage(DashScopeRoleNames.Assistant, contents); + return new MultimodalMessage(DashScopeRoleNames.Assistant, contents, reasoningContent); } } diff --git a/src/Cnblogs.DashScope.Sdk/QWenMultimodal/QWenMultimodalModel.cs b/src/Cnblogs.DashScope.Sdk/QWenMultimodal/QWenMultimodalModel.cs index fa65480..abeeade 100644 --- a/src/Cnblogs.DashScope.Sdk/QWenMultimodal/QWenMultimodalModel.cs +++ b/src/Cnblogs.DashScope.Sdk/QWenMultimodal/QWenMultimodalModel.cs @@ -58,5 +58,25 @@ public enum QWenMultimodalModel /// /// qwen-audio-turbo-latest /// - QWenAudioTurboLatest = 11 + QWenAudioTurboLatest = 11, + + /// + /// qvq-max + /// + QvQMax = 12, + + /// + /// qvq-max-latest + /// + QvQMaxLatest = 13, + + /// + /// qvq-plus + /// + QvQPlus = 14, + + /// + /// qvq-plus-latest + /// + QvQPlusLatest = 15 } diff --git a/src/Cnblogs.DashScope.Sdk/QWenMultimodal/QWenMultimodalModelNames.cs b/src/Cnblogs.DashScope.Sdk/QWenMultimodal/QWenMultimodalModelNames.cs index b5bd02a..e22aeda 100644 --- a/src/Cnblogs.DashScope.Sdk/QWenMultimodal/QWenMultimodalModelNames.cs +++ b/src/Cnblogs.DashScope.Sdk/QWenMultimodal/QWenMultimodalModelNames.cs @@ -17,6 +17,10 @@ public static string GetModelName(this QWenMultimodalModel multimodalModel) QWenMultimodalModel.QWenVlPlusLatest => "qwen-vl-plus-latest", QWenMultimodalModel.QWenVlOcrLatest => "qwen-vl-ocr-latest", QWenMultimodalModel.QWenAudioTurboLatest => "qwen-audio-turbo-latest", + QWenMultimodalModel.QvQMax => "qvq-max", + QWenMultimodalModel.QvQMaxLatest => "qvq-max-latest", + QWenMultimodalModel.QvQPlus => "qvq-plus", + QWenMultimodalModel.QvQPlusLatest => "qvq-plus-latest", _ => ThrowHelper.UnknownModelName(nameof(multimodalModel), multimodalModel) }; } diff --git a/src/Cnblogs.DashScope.Sdk/TextEmbedding/TextEmbeddingModel.cs b/src/Cnblogs.DashScope.Sdk/TextEmbedding/TextEmbeddingModel.cs index 808db24..72af726 100644 --- a/src/Cnblogs.DashScope.Sdk/TextEmbedding/TextEmbeddingModel.cs +++ b/src/Cnblogs.DashScope.Sdk/TextEmbedding/TextEmbeddingModel.cs @@ -19,4 +19,9 @@ public enum TextEmbeddingModel /// text-embedding-v3 /// TextEmbeddingV3 = 3, + + /// + /// text-embedding-v4 + /// + TextEmbeddingV4 = 4 } diff --git a/src/Cnblogs.DashScope.Sdk/TextEmbedding/TextEmbeddingModelNames.cs b/src/Cnblogs.DashScope.Sdk/TextEmbedding/TextEmbeddingModelNames.cs index 410c33e..21d6b7d 100644 --- a/src/Cnblogs.DashScope.Sdk/TextEmbedding/TextEmbeddingModelNames.cs +++ b/src/Cnblogs.DashScope.Sdk/TextEmbedding/TextEmbeddingModelNames.cs @@ -9,6 +9,7 @@ public static string GetModelName(this TextEmbeddingModel model) TextEmbeddingModel.TextEmbeddingV1 => "text-embedding-v1", TextEmbeddingModel.TextEmbeddingV2 => "text-embedding-v2", TextEmbeddingModel.TextEmbeddingV3 => "text-embedding-v3", + TextEmbeddingModel.TextEmbeddingV4 => "text-embedding-v4", _ => ThrowHelper.UnknownModelName(nameof(model), model), }; } diff --git a/src/Cnblogs.DashScope.Sdk/Wanx/WanxModel.cs b/src/Cnblogs.DashScope.Sdk/Wanx/WanxModel.cs index f278eeb..3d19fdd 100644 --- a/src/Cnblogs.DashScope.Sdk/Wanx/WanxModel.cs +++ b/src/Cnblogs.DashScope.Sdk/Wanx/WanxModel.cs @@ -8,5 +8,20 @@ public enum WanxModel /// /// wanx-v1 /// - WanxV1 = 1 + WanxV1 = 1, + + /// + /// wanx2.1-t2i-plus + /// + WanxV21Plus = 2, + + /// + /// wanx2.1-t2i-turbo + /// + WanxV21Turbo = 3, + + /// + /// wanx2.0-t2i-turbo + /// + WanxV20Turbo = 4 } diff --git a/src/Cnblogs.DashScope.Sdk/Wanx/WanxModelNames.cs b/src/Cnblogs.DashScope.Sdk/Wanx/WanxModelNames.cs index ab04555..8e04751 100644 --- a/src/Cnblogs.DashScope.Sdk/Wanx/WanxModelNames.cs +++ b/src/Cnblogs.DashScope.Sdk/Wanx/WanxModelNames.cs @@ -7,6 +7,9 @@ public static string GetModelName(this WanxModel model) return model switch { WanxModel.WanxV1 => "wanx-v1", + WanxModel.WanxV21Plus => "wanx2.1-t2i-plus", + WanxModel.WanxV21Turbo => "wanx2.1-t2i-turbo", + WanxModel.WanxV20Turbo => "wanx2.0-t2i-turbo", _ => ThrowHelper.UnknownModelName(nameof(model), model) }; }