diff --git a/llm/README.md b/llm/README.md index 5065ddc..150cffc 100644 --- a/llm/README.md +++ b/llm/README.md @@ -15,24 +15,38 @@ java -version 1) Download the installation package: ```bash -curl -O https://github.com/instana/otel-dc/releases/download/Release/otel-dc-llm_1.0.0_linux_amd64.tar.gz +curl -O https://github.com/instana/otel-dc/releases/download/v1.0.5/otel-dc-llm-1.0.5.tar ``` 2) Extract the package to the desired deployment location: ```bash -tar vxf otel-dc-llm_1.0.0_linux_amd64.tar.gz +tar vxf otel-dc-llm-1.0.5.tar ``` ## Configuration + +### Configure otel dc ```bash -cd otel-dc-llm-1.0.0 +cd otel-dc-llm-1.0.5 vi config/config.yaml ``` The following options are required: -- `otel.backend.url`:The OTel gRPC address of the OTel backends, for example Instana Agent (as OTel Backend): http://localhost:4317 -- `otel.service.name`:The Data Collector name, which can be any string you choose. -- `*.price.prompt.tokens.per.kilo`:The unit price per thousand prompt tokens. -- `*.price.complete.tokens.per.kilo`:The unit price per thousand complete tokens. +- `otel.agentless.mode`: The connection mode of the OTel data connector, the default mode is agentless. +- `otel.backend.url`: The gRPC endpoint of the Instana backend or Instana agent, that depends on agentless or not. +- `callback.interval`: The time interval in seconds to post data to backend or agent. +- `otel.service.name`: The Data Collector name, which can be any string that you choose. +- `otel.service.port`: The listen port of Data Collector for receiving the metrics data from the instrumented applications, the default port is 8000. + +### Configure model price +```bash +vi config/prices.properties +``` +Customize more price items by the following format: +``` +..input=0.0 +..output=0.0 +``` +The can be set to '*' to match all modelIds within the aiSystem, but this has a lower priority than items with a specific modelId specified. ## Run ODCL Run the Data Collector with the following command according to your current system: diff --git a/llm/build.gradle b/llm/build.gradle index 3025a2e..e87febb 100644 --- a/llm/build.gradle +++ b/llm/build.gradle @@ -4,7 +4,7 @@ plugins { } group = "com.instana.dc" -version = "1.0.2" +version = "1.0.5" sourceCompatibility = 11 targetCompatibility = 11 diff --git a/llm/config/config.yaml b/llm/config/config.yaml index a52db23..5662413 100644 --- a/llm/config/config.yaml +++ b/llm/config/config.yaml @@ -7,12 +7,5 @@ instances: callback.interval: 10 otel.service.name: DC1 otel.service.port: 8000 - #Only configure the settings of the AI provider you are using - watsonx.price.prompt.tokens.per.kilo: 0.0 - watsonx.price.complete.tokens.per.kilo: 0.0 - openai.price.prompt.tokens.per.kilo: 0.0 - openai.price.complete.tokens.per.kilo: 0.0 - anthropic.price.prompt.tokens.per.kilo: 0.0 - anthropic.price.complete.tokens.per.kilo: 0.0 - bedrock.price.prompt.tokens.per.kilo: 0.0 - bedrock.price.complete.tokens.per.kilo: 0.0 + +# Customize pricing only for models in use in config/prices.properties diff --git a/llm/config/prices.properties b/llm/config/prices.properties new file mode 100644 index 0000000..63d169c --- /dev/null +++ b/llm/config/prices.properties @@ -0,0 +1,114 @@ +################################################################ +# +# Customize more price items by the following format: +# ..input=0.0 +# ..output=0.0 +# The can be set to '*' to match all modelIds within the aiSystem, +# but this has a lower priority than items with a specific modelId specified. +# +################################################################ + + +# Price per 1000 tokens for IBM watsonx +# The '*' is to match all modelIds of watsonx +watsonx.*.input=0.0 +watsonx.*.output=0.0 + +# Price per 1000 tokens for OpenAI +# The '*' is to match all modelIds of openai +openai.*.input=0.0 +openai.*.output=0.0 + +# Price per 1000 tokens for Anthropic +# The '*' is to match all modelIds of anthropic +anthropic.*.input=0.0 +anthropic.*.output=0.0 + +# Price per 1000 tokens for Langchain +# The '*' is to match all modelIds of Langchain +langchain.*.input=0.0 +langchain.*.output=0.0 +langchain.ibm/granite-13b-chat-v2.input=0.0 +langchain.ibm/granite-13b-chat-v2.output=0.0 +langchain.ibm/granite-13b-instruct-v2.input=0.0 +langchain.ibm/granite-13b-instruct-v2.output=0.0 + +# Price per 1000 tokens for Bedrock modelIds +bedrock.ai21.jamba-instruct-v1:0.input=0.0 +bedrock.ai21.jamba-instruct-v1:0.output=0.0 +bedrock.ai21.j2-mid-v1.input=0.0 +bedrock.ai21.j2-mid-v1.output=0.0 +bedrock.ai21.j2-ultra-v1.input=0.0 +bedrock.ai21.j2-ultra-v1.output=0.0 +bedrock.ai21.jamba-1-5-large-v1:0.input=0.0 +bedrock.ai21.jamba-1-5-large-v1:0.output=0.0 +bedrock.ai21.jamba-1-5-mini-v1:0.input=0.0 +bedrock.ai21.jamba-1-5-mini-v1:0.output=0.0 +bedrock.amazon.titan-text-express-v1.input=0.0 +bedrock.amazon.titan-text-express-v1.output=0.0 +bedrock.amazon.titan-text-lite-v1.input=0.0 +bedrock.amazon.titan-text-lite-v1.output=0.0 +bedrock.amazon.titan-text-premier-v1:0.input=0.0 +bedrock.amazon.titan-text-premier-v1:0.output=0.0 +bedrock.amazon.titan-embed-text-v1.input=0.0 +bedrock.amazon.titan-embed-text-v1.output=0.0 +bedrock.amazon.titan-embed-text-v2:0.input=0.0 +bedrock.amazon.titan-embed-text-v2:0.output=0.0 +bedrock.amazon.titan-embed-image-v1.input=0.0 +bedrock.amazon.titan-embed-image-v1.output=0.0 +bedrock.amazon.titan-image-generator-v1.input=0.0 +bedrock.amazon.titan-image-generator-v1.output=0.0 +bedrock.amazon.titan-image-generator-v2:0.input=0.0 +bedrock.amazon.titan-image-generator-v2:0.output=0.0 +bedrock.anthropic.claude-v2.input=0.0 +bedrock.anthropic.claude-v2.output=0.0 +bedrock.anthropic.claude-v2:1.input=0.0 +bedrock.anthropic.claude-v2:1.output=0.0 +bedrock.anthropic.claude-3-sonnet-20240229-v1:0.input=0.0 +bedrock.anthropic.claude-3-sonnet-20240229-v1:0.output=0.0 +bedrock.anthropic.claude-3-5-sonnet-20240620-v1:0.input=0.0 +bedrock.anthropic.claude-3-5-sonnet-20240620-v1:0.output=0.0 +bedrock.anthropic.claude-3-5-sonnet-20241022-v2:0.input=0.0 +bedrock.anthropic.claude-3-5-sonnet-20241022-v2:0.output=0.0 +bedrock.anthropic.claude-3-haiku-20240307-v1:0.input=0.0 +bedrock.anthropic.claude-3-haiku-20240307-v1:0.output=0.0 +bedrock.anthropic.claude-3-5-haiku-20241022-v1:0.input=0.0 +bedrock.anthropic.claude-3-5-haiku-20241022-v1:0.output=0.0 +bedrock.anthropic.claude-3-opus-20240229-v1:0.input=0.0 +bedrock.anthropic.claude-3-opus-20240229-v1:0.output=0.0 +bedrock.anthropic.claude-instant-v1.input=0.0 +bedrock.anthropic.claude-instant-v1.output=0.0 +bedrock.cohere.command-text-v14.input=0.0 +bedrock.cohere.command-text-v14.output=0.0 +bedrock.cohere.command-light-text-v14.input=0.0 +bedrock.cohere.command-light-text-v14.output=0.0 +bedrock.cohere.command-r-v1:0.input=0.0 +bedrock.cohere.command-r-v1:0.output=0.0 +bedrock.cohere.command-r-plus-v1:0.input=0.0 +bedrock.cohere.command-r-plus-v1:0.output=0.0 +bedrock.cohere.embed-english-v3.input=0.0 +bedrock.cohere.embed-english-v3.output=0.0 +bedrock.cohere.embed-multilingual-v3.input=0.0 +bedrock.cohere.embed-multilingual-v3.output=0.0 +bedrock.meta.llama2-13b-chat-v1.input=0.0 +bedrock.meta.llama2-13b-chat-v1.output=0.0 +bedrock.meta.llama2-70b-chat-v1.input=0.0 +bedrock.meta.llama2-70b-chat-v1.output=0.0 +bedrock.meta.llama3-8b-instruct-v1:0.input=0.0 +bedrock.meta.llama3-8b-instruct-v1:0.output=0.0 +bedrock.meta.llama3-70b-instruct-v1:0.input=0.0 +bedrock.meta.llama3-70b-instruct-v1:0.output=0.0 +bedrock.meta.llama3-1-8b-instruct-v1:0.input=0.0 +bedrock.meta.llama3-1-8b-instruct-v1:0.output=0.0 +bedrock.meta.llama3-1-70b-instruct-v1:0.input=0.0 +bedrock.meta.llama3-1-70b-instruct-v1:0.output=0.0 +bedrock.meta.llama3-1-405b-instruct-v1:0.input=0.0 +bedrock.meta.llama3-1-405b-instruct-v1:0.output=0.0 +bedrock.meta.llama3-2-1b-instruct-v1:0.input=0.0 +bedrock.meta.llama3-2-1b-instruct-v1:0.output=0.0 +bedrock.meta.llama3-2-3b-instruct-v1:0.input=0.0 +bedrock.meta.llama3-2-3b-instruct-v1:0.output=0.0 +bedrock.meta.llama3-2-11b-instruct-v1:0.input=0.0 +bedrock.meta.llama3-2-11b-instruct-v1:0.output=0.0 +bedrock.meta.llama3-2-90b-instruct-v1:0.input=0.0 +bedrock.meta.llama3-2-90b-instruct-v1:0.output=0.0 diff --git a/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java b/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java index dba4725..7b7f985 100644 --- a/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java +++ b/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java @@ -13,14 +13,7 @@ public class LLMDcUtil { */ public static final String DEFAULT_INSTRUMENTATION_SCOPE = "instana.sensor-sdk.dc.llm"; public static final String DEFAULT_INSTRUMENTATION_SCOPE_VER = "1.0.0"; - public final static String WATSONX_PRICE_PROMPT_TOKES_PER_KILO = "watsonx.price.prompt.tokens.per.kilo"; - public final static String WATSONX_PRICE_COMPLETE_TOKES_PER_KILO = "watsonx.price.complete.tokens.per.kilo"; - public final static String OPENAI_PRICE_PROMPT_TOKES_PER_KILO = "openai.price.prompt.tokens.per.kilo"; - public final static String OPENAI_PRICE_COMPLETE_TOKES_PER_KILO = "openai.price.complete.tokens.per.kilo"; - public final static String ANTHROPIC_PRICE_PROMPT_TOKES_PER_KILO = "anthropic.price.prompt.tokens.per.kilo"; - public final static String ANTHROPIC_PRICE_COMPLETE_TOKES_PER_KILO = "anthropic.price.complete.tokens.per.kilo"; - public final static String BEDROCK_PRICE_PROMPT_TOKES_PER_KILO = "bedrock.price.prompt.tokens.per.kilo"; - public final static String BEDROCK_PRICE_COMPLETE_TOKES_PER_KILO = "bedrock.price.complete.tokens.per.kilo"; + public final static String LLM_PRICES_PROPERTIES = "config/prices.properties"; public final static String SERVICE_LISTEN_PORT = "otel.service.port"; public final static String OTEL_AGENTLESS_MODE = "otel.agentless.mode"; diff --git a/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java b/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java index 4d0d666..c05118a 100644 --- a/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java +++ b/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java @@ -6,23 +6,18 @@ import static com.instana.dc.DcUtil.CALLBACK_INTERVAL; import static com.instana.dc.DcUtil.POLLING_INTERVAL; -import static com.instana.dc.llm.LLMDcUtil.ANTHROPIC_PRICE_COMPLETE_TOKES_PER_KILO; -import static com.instana.dc.llm.LLMDcUtil.ANTHROPIC_PRICE_PROMPT_TOKES_PER_KILO; -import static com.instana.dc.llm.LLMDcUtil.BEDROCK_PRICE_COMPLETE_TOKES_PER_KILO; -import static com.instana.dc.llm.LLMDcUtil.BEDROCK_PRICE_PROMPT_TOKES_PER_KILO; import static com.instana.dc.llm.LLMDcUtil.LLM_COST_NAME; import static com.instana.dc.llm.LLMDcUtil.LLM_DURATION_MAX_NAME; import static com.instana.dc.llm.LLMDcUtil.LLM_DURATION_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_PRICES_PROPERTIES; import static com.instana.dc.llm.LLMDcUtil.LLM_REQ_COUNT_NAME; import static com.instana.dc.llm.LLMDcUtil.LLM_STATUS_NAME; import static com.instana.dc.llm.LLMDcUtil.LLM_TOKEN_NAME; -import static com.instana.dc.llm.LLMDcUtil.OPENAI_PRICE_COMPLETE_TOKES_PER_KILO; -import static com.instana.dc.llm.LLMDcUtil.OPENAI_PRICE_PROMPT_TOKES_PER_KILO; import static com.instana.dc.llm.LLMDcUtil.OTEL_AGENTLESS_MODE; import static com.instana.dc.llm.LLMDcUtil.SERVICE_LISTEN_PORT; -import static com.instana.dc.llm.LLMDcUtil.WATSONX_PRICE_COMPLETE_TOKES_PER_KILO; -import static com.instana.dc.llm.LLMDcUtil.WATSONX_PRICE_PROMPT_TOKES_PER_KILO; +import java.io.BufferedReader; +import java.io.FileReader; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -48,14 +43,7 @@ public class LLMDc extends AbstractLLMDc { private Boolean otelAgentlessMode = Boolean.FALSE; private Integer callbackInterval = DEFAULT_LLM_CLBK_INTERVAL; private Integer otelPollInterval = DEFAULT_LLM_POLL_INTERVAL; - private Double watsonxPricePromptTokens = 0.0; - private Double watsonxPriceCompleteTokens = 0.0; - private Double openaiPricePromptTokens = 0.0; - private Double openaiPriceCompleteTokens = 0.0; - private Double anthropicPricePromptTokens = 0.0; - private Double anthropicPriceCompleteTokens = 0.0; - private Double bedrockPricePromptTokens = 0.0; - private Double bedrockPriceCompleteTokens = 0.0; + private HashMap llmTokenPrices = new HashMap<>(); private int listenPort = 0; /** @@ -126,19 +114,37 @@ public LLMDc(Map properties, CustomDcConfig cdcConfig) throws Ex otelAgentlessMode = (Boolean) properties.getOrDefault(OTEL_AGENTLESS_MODE, Boolean.FALSE); callbackInterval = (Integer) properties.getOrDefault(CALLBACK_INTERVAL, DEFAULT_LLM_CLBK_INTERVAL); otelPollInterval = (Integer) properties.getOrDefault(POLLING_INTERVAL, callbackInterval); - watsonxPricePromptTokens = (Double) properties.getOrDefault(WATSONX_PRICE_PROMPT_TOKES_PER_KILO, 0.0); - watsonxPriceCompleteTokens = (Double) properties.getOrDefault(WATSONX_PRICE_COMPLETE_TOKES_PER_KILO, 0.0); - openaiPricePromptTokens = (Double) properties.getOrDefault(OPENAI_PRICE_PROMPT_TOKES_PER_KILO, 0.0); - openaiPriceCompleteTokens = (Double) properties.getOrDefault(OPENAI_PRICE_COMPLETE_TOKES_PER_KILO, 0.0); - anthropicPricePromptTokens = (Double) properties.getOrDefault(ANTHROPIC_PRICE_PROMPT_TOKES_PER_KILO, 0.0); - anthropicPriceCompleteTokens = (Double) properties.getOrDefault(ANTHROPIC_PRICE_COMPLETE_TOKES_PER_KILO, 0.0); - bedrockPricePromptTokens = (Double) properties.getOrDefault(BEDROCK_PRICE_PROMPT_TOKES_PER_KILO, 0.0); - bedrockPriceCompleteTokens = (Double) properties.getOrDefault(BEDROCK_PRICE_COMPLETE_TOKES_PER_KILO, 0.0); listenPort = (int) properties.getOrDefault(SERVICE_LISTEN_PORT, 8000); + + String pricePropFile = LLM_PRICES_PROPERTIES; + try (BufferedReader reader = new BufferedReader(new FileReader(pricePropFile))) { + String line; + while ((line = reader.readLine()) != null) { + line = line.trim(); + if (line.isEmpty() || line.startsWith("#")) { + continue; + } + int equalIndex = line.indexOf('='); + if (equalIndex > 0) { + String key = line.substring(0, equalIndex).trim().toLowerCase(); + String value = line.substring(equalIndex + 1).trim(); + try { + Double dValue = Double.parseDouble(value); + llmTokenPrices.put(key, dValue); + } catch (Exception e) { + llmTokenPrices.put(key, 0.0); + logger.warning(value + " cannot be parsed to Double: " + e.getMessage()); + } + } + } + } catch (Exception e) { + logger.severe("Cannot load price properties file: " + e.getMessage()); + } } @Override public void initOnce() throws ClassNotFoundException { + var server = Server.builder() .http(listenPort) .service( @@ -225,8 +231,24 @@ public void collectData() { } int divisor = otelAgentlessMode? 1:otelPollInterval; - double priceInputTokens = getPricePromptTokens(aiSystem); - double priceOutputTokens = getPriceCompleteTokens(aiSystem); + String inputPriceKey = aiSystem + "." + modelId + ".input"; + Double priceInputTokens = llmTokenPrices.get(inputPriceKey.toLowerCase()); + if(priceInputTokens == null) { + String inputFlatPriceKey = aiSystem + ".*.input"; + priceInputTokens = llmTokenPrices.get(inputFlatPriceKey.toLowerCase()); + if (priceInputTokens == null) { + priceInputTokens = 0.0; + } + } + String outputPriceKey = aiSystem + "." + modelId + ".output"; + Double priceOutputTokens = llmTokenPrices.get(outputPriceKey.toLowerCase()); + if(priceOutputTokens == null) { + String outputFlatPriceKey = aiSystem + ".*.output"; + priceOutputTokens = llmTokenPrices.get(outputFlatPriceKey.toLowerCase()); + if (priceOutputTokens == null) { + priceOutputTokens = 0.0; + } + } double intervalReqCount = (double)deltaRequestCount/divisor; double intervalInputTokens = (double)deltaInputTokens/divisor; @@ -265,24 +287,4 @@ public void collectData() { } logger.info("-----------------------------------------"); } - - private double getPricePromptTokens(String aiSystem) { - switch (aiSystem) { - case "watsonx": return watsonxPricePromptTokens; - case "openai": return openaiPricePromptTokens; - case "anthropic": return anthropicPricePromptTokens; - case "bedrock": return bedrockPricePromptTokens; - default: return 0.0; - } - } - - private double getPriceCompleteTokens(String aiSystem) { - switch (aiSystem) { - case "watsonx": return watsonxPriceCompleteTokens; - case "openai": return openaiPriceCompleteTokens; - case "anthropic": return anthropicPriceCompleteTokens; - case "bedrock": return bedrockPriceCompleteTokens; - default: return 0.0; - } - } } diff --git a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java index b39c4ea..53dd3ca 100644 --- a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java +++ b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java @@ -269,6 +269,7 @@ private void processTokenMetric(Metric metric, String serviceName) { String modelId = ""; String tokenType = ""; String aiSystem = ""; + String aiVendor = ""; for (KeyValue kv : kvList) { if (kv.getKey().compareTo("gen_ai.response.model") == 0) { modelId = kv.getValue().getStringValue(); @@ -276,11 +277,17 @@ private void processTokenMetric(Metric metric, String serviceName) { } else if (kv.getKey().compareTo("gen_ai.system") == 0) { aiSystem = kv.getValue().getStringValue(); System.out.println("Recv Metric --- AI System: " + aiSystem); + } else if (kv.getKey().compareTo("vendor") == 0) { + aiVendor = kv.getValue().getStringValue(); + System.out.println("Recv Metric --- AI vendor: " + aiVendor); } else if (kv.getKey().compareTo("gen_ai.token.type") == 0) { tokenType = kv.getValue().getStringValue(); System.out.println("Recv Metric --- Token Type: " + tokenType); } } + if (!aiVendor.isEmpty() && !modelId.isEmpty()) { + modelId = aiVendor + "." + modelId; + } if (!modelId.isEmpty()) { double tokenSum = dataPoint.getSum(); long requestCount = dataPoint.getCount(); @@ -331,6 +338,7 @@ private void processDurationMetric(Metric metric, String serviceName) { List kvList = dataPoint.getAttributesList(); String modelId = ""; String aiSystem = ""; + String aiVendor = ""; // for Bedrock for (KeyValue kv : kvList) { if (kv.getKey().compareTo("gen_ai.response.model") == 0) { modelId = kv.getValue().getStringValue(); @@ -338,8 +346,14 @@ private void processDurationMetric(Metric metric, String serviceName) { } else if (kv.getKey().compareTo("gen_ai.system") == 0) { aiSystem = kv.getValue().getStringValue(); System.out.println("Recv Metric --- AI System: " + aiSystem); + } else if (kv.getKey().compareTo("vendor") == 0) { + aiVendor = kv.getValue().getStringValue(); + System.out.println("Recv Metric --- AI vendor: " + aiVendor); } } + if (!aiVendor.isEmpty() && !modelId.isEmpty()) { + modelId = aiVendor + "." + modelId; + } if (!modelId.isEmpty()) { double durationSum = dataPoint.getSum(); long requestCount = dataPoint.getCount();