Skip to content

Commit

Permalink
Custom the token price per model (#70)
Browse files Browse the repository at this point in the history
* Custom the token price per model

* Always use lower case for key

* Add the price items for Langchain

* Add vendor name as the part of modelId

* Update the version to v1.0.5
  • Loading branch information
jinsongo authored Nov 12, 2024
1 parent 2568a1a commit 575b23f
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 71 deletions.
28 changes: 21 additions & 7 deletions llm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,38 @@ java -version

1) Download the installation package:
```bash
curl -O https://github.com/instana/otel-dc/releases/download/Release/otel-dc-llm_1.0.0_linux_amd64.tar.gz
curl -O https://github.com/instana/otel-dc/releases/download/v1.0.5/otel-dc-llm-1.0.5.tar
```
2) Extract the package to the desired deployment location:
```bash
tar vxf otel-dc-llm_1.0.0_linux_amd64.tar.gz
tar vxf otel-dc-llm-1.0.5.tar
```

## Configuration

### Configure otel dc
```bash
cd otel-dc-llm-1.0.0
cd otel-dc-llm-1.0.5
vi config/config.yaml
```
The following options are required:
- `otel.backend.url`:The OTel gRPC address of the OTel backends, for example Instana Agent (as OTel Backend): http://localhost:4317
- `otel.service.name`:The Data Collector name, which can be any string you choose.
- `*.price.prompt.tokens.per.kilo`:The unit price per thousand prompt tokens.
- `*.price.complete.tokens.per.kilo`:The unit price per thousand complete tokens.
- `otel.agentless.mode`: The connection mode of the OTel data connector, the default mode is agentless.
- `otel.backend.url`: The gRPC endpoint of the Instana backend or Instana agent, that depends on agentless or not.
- `callback.interval`: The time interval in seconds to post data to backend or agent.
- `otel.service.name`: The Data Collector name, which can be any string that you choose.
- `otel.service.port`: The listen port of Data Collector for receiving the metrics data from the instrumented applications, the default port is 8000.


### Configure model price
```bash
vi config/prices.properties
```
Customize more price items by the following format:
```
<aiSystem>.<modelId>.input=0.0
<aiSystem>.<modelId>.output=0.0
```
The <modelId> can be set to '*' to match all modelIds within the aiSystem, but this has a lower priority than items with a specific modelId specified.

## Run ODCL
Run the Data Collector with the following command according to your current system:
Expand Down
2 changes: 1 addition & 1 deletion llm/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ plugins {
}

group = "com.instana.dc"
version = "1.0.2"
version = "1.0.5"
sourceCompatibility = 11
targetCompatibility = 11

Expand Down
11 changes: 2 additions & 9 deletions llm/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,5 @@ instances:
callback.interval: 10
otel.service.name: DC1
otel.service.port: 8000
#Only configure the settings of the AI provider you are using
watsonx.price.prompt.tokens.per.kilo: 0.0
watsonx.price.complete.tokens.per.kilo: 0.0
openai.price.prompt.tokens.per.kilo: 0.0
openai.price.complete.tokens.per.kilo: 0.0
anthropic.price.prompt.tokens.per.kilo: 0.0
anthropic.price.complete.tokens.per.kilo: 0.0
bedrock.price.prompt.tokens.per.kilo: 0.0
bedrock.price.complete.tokens.per.kilo: 0.0

# Customize pricing only for models in use in config/prices.properties
114 changes: 114 additions & 0 deletions llm/config/prices.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
################################################################
#
# Customize more price items by the following format:
# <aiSystem>.<modelId>.input=0.0
# <aiSystem>.<modelId>.output=0.0
# The <modelId> can be set to '*' to match all modelIds within the aiSystem,
# but this has a lower priority than items with a specific modelId specified.
#
################################################################


# Price per 1000 tokens for IBM watsonx
# The '*' is to match all modelIds of watsonx
watsonx.*.input=0.0
watsonx.*.output=0.0

# Price per 1000 tokens for OpenAI
# The '*' is to match all modelIds of openai
openai.*.input=0.0
openai.*.output=0.0

# Price per 1000 tokens for Anthropic
# The '*' is to match all modelIds of anthropic
anthropic.*.input=0.0
anthropic.*.output=0.0

# Price per 1000 tokens for Langchain
# The '*' is to match all modelIds of Langchain
langchain.*.input=0.0
langchain.*.output=0.0
langchain.ibm/granite-13b-chat-v2.input=0.0
langchain.ibm/granite-13b-chat-v2.output=0.0
langchain.ibm/granite-13b-instruct-v2.input=0.0
langchain.ibm/granite-13b-instruct-v2.output=0.0

# Price per 1000 tokens for Bedrock modelIds
bedrock.ai21.jamba-instruct-v1:0.input=0.0
bedrock.ai21.jamba-instruct-v1:0.output=0.0
bedrock.ai21.j2-mid-v1.input=0.0
bedrock.ai21.j2-mid-v1.output=0.0
bedrock.ai21.j2-ultra-v1.input=0.0
bedrock.ai21.j2-ultra-v1.output=0.0
bedrock.ai21.jamba-1-5-large-v1:0.input=0.0
bedrock.ai21.jamba-1-5-large-v1:0.output=0.0
bedrock.ai21.jamba-1-5-mini-v1:0.input=0.0
bedrock.ai21.jamba-1-5-mini-v1:0.output=0.0
bedrock.amazon.titan-text-express-v1.input=0.0
bedrock.amazon.titan-text-express-v1.output=0.0
bedrock.amazon.titan-text-lite-v1.input=0.0
bedrock.amazon.titan-text-lite-v1.output=0.0
bedrock.amazon.titan-text-premier-v1:0.input=0.0
bedrock.amazon.titan-text-premier-v1:0.output=0.0
bedrock.amazon.titan-embed-text-v1.input=0.0
bedrock.amazon.titan-embed-text-v1.output=0.0
bedrock.amazon.titan-embed-text-v2:0.input=0.0
bedrock.amazon.titan-embed-text-v2:0.output=0.0
bedrock.amazon.titan-embed-image-v1.input=0.0
bedrock.amazon.titan-embed-image-v1.output=0.0
bedrock.amazon.titan-image-generator-v1.input=0.0
bedrock.amazon.titan-image-generator-v1.output=0.0
bedrock.amazon.titan-image-generator-v2:0.input=0.0
bedrock.amazon.titan-image-generator-v2:0.output=0.0
bedrock.anthropic.claude-v2.input=0.0
bedrock.anthropic.claude-v2.output=0.0
bedrock.anthropic.claude-v2:1.input=0.0
bedrock.anthropic.claude-v2:1.output=0.0
bedrock.anthropic.claude-3-sonnet-20240229-v1:0.input=0.0
bedrock.anthropic.claude-3-sonnet-20240229-v1:0.output=0.0
bedrock.anthropic.claude-3-5-sonnet-20240620-v1:0.input=0.0
bedrock.anthropic.claude-3-5-sonnet-20240620-v1:0.output=0.0
bedrock.anthropic.claude-3-5-sonnet-20241022-v2:0.input=0.0
bedrock.anthropic.claude-3-5-sonnet-20241022-v2:0.output=0.0
bedrock.anthropic.claude-3-haiku-20240307-v1:0.input=0.0
bedrock.anthropic.claude-3-haiku-20240307-v1:0.output=0.0
bedrock.anthropic.claude-3-5-haiku-20241022-v1:0.input=0.0
bedrock.anthropic.claude-3-5-haiku-20241022-v1:0.output=0.0
bedrock.anthropic.claude-3-opus-20240229-v1:0.input=0.0
bedrock.anthropic.claude-3-opus-20240229-v1:0.output=0.0
bedrock.anthropic.claude-instant-v1.input=0.0
bedrock.anthropic.claude-instant-v1.output=0.0
bedrock.cohere.command-text-v14.input=0.0
bedrock.cohere.command-text-v14.output=0.0
bedrock.cohere.command-light-text-v14.input=0.0
bedrock.cohere.command-light-text-v14.output=0.0
bedrock.cohere.command-r-v1:0.input=0.0
bedrock.cohere.command-r-v1:0.output=0.0
bedrock.cohere.command-r-plus-v1:0.input=0.0
bedrock.cohere.command-r-plus-v1:0.output=0.0
bedrock.cohere.embed-english-v3.input=0.0
bedrock.cohere.embed-english-v3.output=0.0
bedrock.cohere.embed-multilingual-v3.input=0.0
bedrock.cohere.embed-multilingual-v3.output=0.0
bedrock.meta.llama2-13b-chat-v1.input=0.0
bedrock.meta.llama2-13b-chat-v1.output=0.0
bedrock.meta.llama2-70b-chat-v1.input=0.0
bedrock.meta.llama2-70b-chat-v1.output=0.0
bedrock.meta.llama3-8b-instruct-v1:0.input=0.0
bedrock.meta.llama3-8b-instruct-v1:0.output=0.0
bedrock.meta.llama3-70b-instruct-v1:0.input=0.0
bedrock.meta.llama3-70b-instruct-v1:0.output=0.0
bedrock.meta.llama3-1-8b-instruct-v1:0.input=0.0
bedrock.meta.llama3-1-8b-instruct-v1:0.output=0.0
bedrock.meta.llama3-1-70b-instruct-v1:0.input=0.0
bedrock.meta.llama3-1-70b-instruct-v1:0.output=0.0
bedrock.meta.llama3-1-405b-instruct-v1:0.input=0.0
bedrock.meta.llama3-1-405b-instruct-v1:0.output=0.0
bedrock.meta.llama3-2-1b-instruct-v1:0.input=0.0
bedrock.meta.llama3-2-1b-instruct-v1:0.output=0.0
bedrock.meta.llama3-2-3b-instruct-v1:0.input=0.0
bedrock.meta.llama3-2-3b-instruct-v1:0.output=0.0
bedrock.meta.llama3-2-11b-instruct-v1:0.input=0.0
bedrock.meta.llama3-2-11b-instruct-v1:0.output=0.0
bedrock.meta.llama3-2-90b-instruct-v1:0.input=0.0
bedrock.meta.llama3-2-90b-instruct-v1:0.output=0.0
9 changes: 1 addition & 8 deletions llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,7 @@ public class LLMDcUtil {
*/
public static final String DEFAULT_INSTRUMENTATION_SCOPE = "instana.sensor-sdk.dc.llm";
public static final String DEFAULT_INSTRUMENTATION_SCOPE_VER = "1.0.0";
public final static String WATSONX_PRICE_PROMPT_TOKES_PER_KILO = "watsonx.price.prompt.tokens.per.kilo";
public final static String WATSONX_PRICE_COMPLETE_TOKES_PER_KILO = "watsonx.price.complete.tokens.per.kilo";
public final static String OPENAI_PRICE_PROMPT_TOKES_PER_KILO = "openai.price.prompt.tokens.per.kilo";
public final static String OPENAI_PRICE_COMPLETE_TOKES_PER_KILO = "openai.price.complete.tokens.per.kilo";
public final static String ANTHROPIC_PRICE_PROMPT_TOKES_PER_KILO = "anthropic.price.prompt.tokens.per.kilo";
public final static String ANTHROPIC_PRICE_COMPLETE_TOKES_PER_KILO = "anthropic.price.complete.tokens.per.kilo";
public final static String BEDROCK_PRICE_PROMPT_TOKES_PER_KILO = "bedrock.price.prompt.tokens.per.kilo";
public final static String BEDROCK_PRICE_COMPLETE_TOKES_PER_KILO = "bedrock.price.complete.tokens.per.kilo";
public final static String LLM_PRICES_PROPERTIES = "config/prices.properties";
public final static String SERVICE_LISTEN_PORT = "otel.service.port";
public final static String OTEL_AGENTLESS_MODE = "otel.agentless.mode";

Expand Down
94 changes: 48 additions & 46 deletions llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,18 @@

import static com.instana.dc.DcUtil.CALLBACK_INTERVAL;
import static com.instana.dc.DcUtil.POLLING_INTERVAL;
import static com.instana.dc.llm.LLMDcUtil.ANTHROPIC_PRICE_COMPLETE_TOKES_PER_KILO;
import static com.instana.dc.llm.LLMDcUtil.ANTHROPIC_PRICE_PROMPT_TOKES_PER_KILO;
import static com.instana.dc.llm.LLMDcUtil.BEDROCK_PRICE_COMPLETE_TOKES_PER_KILO;
import static com.instana.dc.llm.LLMDcUtil.BEDROCK_PRICE_PROMPT_TOKES_PER_KILO;
import static com.instana.dc.llm.LLMDcUtil.LLM_COST_NAME;
import static com.instana.dc.llm.LLMDcUtil.LLM_DURATION_MAX_NAME;
import static com.instana.dc.llm.LLMDcUtil.LLM_DURATION_NAME;
import static com.instana.dc.llm.LLMDcUtil.LLM_PRICES_PROPERTIES;
import static com.instana.dc.llm.LLMDcUtil.LLM_REQ_COUNT_NAME;
import static com.instana.dc.llm.LLMDcUtil.LLM_STATUS_NAME;
import static com.instana.dc.llm.LLMDcUtil.LLM_TOKEN_NAME;
import static com.instana.dc.llm.LLMDcUtil.OPENAI_PRICE_COMPLETE_TOKES_PER_KILO;
import static com.instana.dc.llm.LLMDcUtil.OPENAI_PRICE_PROMPT_TOKES_PER_KILO;
import static com.instana.dc.llm.LLMDcUtil.OTEL_AGENTLESS_MODE;
import static com.instana.dc.llm.LLMDcUtil.SERVICE_LISTEN_PORT;
import static com.instana.dc.llm.LLMDcUtil.WATSONX_PRICE_COMPLETE_TOKES_PER_KILO;
import static com.instana.dc.llm.LLMDcUtil.WATSONX_PRICE_PROMPT_TOKES_PER_KILO;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -48,14 +43,7 @@ public class LLMDc extends AbstractLLMDc {
private Boolean otelAgentlessMode = Boolean.FALSE;
private Integer callbackInterval = DEFAULT_LLM_CLBK_INTERVAL;
private Integer otelPollInterval = DEFAULT_LLM_POLL_INTERVAL;
private Double watsonxPricePromptTokens = 0.0;
private Double watsonxPriceCompleteTokens = 0.0;
private Double openaiPricePromptTokens = 0.0;
private Double openaiPriceCompleteTokens = 0.0;
private Double anthropicPricePromptTokens = 0.0;
private Double anthropicPriceCompleteTokens = 0.0;
private Double bedrockPricePromptTokens = 0.0;
private Double bedrockPriceCompleteTokens = 0.0;
private HashMap<String, Double> llmTokenPrices = new HashMap<>();
private int listenPort = 0;

/**
Expand Down Expand Up @@ -126,19 +114,37 @@ public LLMDc(Map<String, Object> properties, CustomDcConfig cdcConfig) throws Ex
otelAgentlessMode = (Boolean) properties.getOrDefault(OTEL_AGENTLESS_MODE, Boolean.FALSE);
callbackInterval = (Integer) properties.getOrDefault(CALLBACK_INTERVAL, DEFAULT_LLM_CLBK_INTERVAL);
otelPollInterval = (Integer) properties.getOrDefault(POLLING_INTERVAL, callbackInterval);
watsonxPricePromptTokens = (Double) properties.getOrDefault(WATSONX_PRICE_PROMPT_TOKES_PER_KILO, 0.0);
watsonxPriceCompleteTokens = (Double) properties.getOrDefault(WATSONX_PRICE_COMPLETE_TOKES_PER_KILO, 0.0);
openaiPricePromptTokens = (Double) properties.getOrDefault(OPENAI_PRICE_PROMPT_TOKES_PER_KILO, 0.0);
openaiPriceCompleteTokens = (Double) properties.getOrDefault(OPENAI_PRICE_COMPLETE_TOKES_PER_KILO, 0.0);
anthropicPricePromptTokens = (Double) properties.getOrDefault(ANTHROPIC_PRICE_PROMPT_TOKES_PER_KILO, 0.0);
anthropicPriceCompleteTokens = (Double) properties.getOrDefault(ANTHROPIC_PRICE_COMPLETE_TOKES_PER_KILO, 0.0);
bedrockPricePromptTokens = (Double) properties.getOrDefault(BEDROCK_PRICE_PROMPT_TOKES_PER_KILO, 0.0);
bedrockPriceCompleteTokens = (Double) properties.getOrDefault(BEDROCK_PRICE_COMPLETE_TOKES_PER_KILO, 0.0);
listenPort = (int) properties.getOrDefault(SERVICE_LISTEN_PORT, 8000);

String pricePropFile = LLM_PRICES_PROPERTIES;
try (BufferedReader reader = new BufferedReader(new FileReader(pricePropFile))) {
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
if (line.isEmpty() || line.startsWith("#")) {
continue;
}
int equalIndex = line.indexOf('=');
if (equalIndex > 0) {
String key = line.substring(0, equalIndex).trim().toLowerCase();
String value = line.substring(equalIndex + 1).trim();
try {
Double dValue = Double.parseDouble(value);
llmTokenPrices.put(key, dValue);
} catch (Exception e) {
llmTokenPrices.put(key, 0.0);
logger.warning(value + " cannot be parsed to Double: " + e.getMessage());
}
}
}
} catch (Exception e) {
logger.severe("Cannot load price properties file: " + e.getMessage());
}
}

@Override
public void initOnce() throws ClassNotFoundException {

var server = Server.builder()
.http(listenPort)
.service(
Expand Down Expand Up @@ -225,8 +231,24 @@ public void collectData() {
}
int divisor = otelAgentlessMode? 1:otelPollInterval;

double priceInputTokens = getPricePromptTokens(aiSystem);
double priceOutputTokens = getPriceCompleteTokens(aiSystem);
String inputPriceKey = aiSystem + "." + modelId + ".input";
Double priceInputTokens = llmTokenPrices.get(inputPriceKey.toLowerCase());
if(priceInputTokens == null) {
String inputFlatPriceKey = aiSystem + ".*.input";
priceInputTokens = llmTokenPrices.get(inputFlatPriceKey.toLowerCase());
if (priceInputTokens == null) {
priceInputTokens = 0.0;
}
}
String outputPriceKey = aiSystem + "." + modelId + ".output";
Double priceOutputTokens = llmTokenPrices.get(outputPriceKey.toLowerCase());
if(priceOutputTokens == null) {
String outputFlatPriceKey = aiSystem + ".*.output";
priceOutputTokens = llmTokenPrices.get(outputFlatPriceKey.toLowerCase());
if (priceOutputTokens == null) {
priceOutputTokens = 0.0;
}
}

double intervalReqCount = (double)deltaRequestCount/divisor;
double intervalInputTokens = (double)deltaInputTokens/divisor;
Expand Down Expand Up @@ -265,24 +287,4 @@ public void collectData() {
}
logger.info("-----------------------------------------");
}

private double getPricePromptTokens(String aiSystem) {
switch (aiSystem) {
case "watsonx": return watsonxPricePromptTokens;
case "openai": return openaiPricePromptTokens;
case "anthropic": return anthropicPricePromptTokens;
case "bedrock": return bedrockPricePromptTokens;
default: return 0.0;
}
}

private double getPriceCompleteTokens(String aiSystem) {
switch (aiSystem) {
case "watsonx": return watsonxPriceCompleteTokens;
case "openai": return openaiPriceCompleteTokens;
case "anthropic": return anthropicPriceCompleteTokens;
case "bedrock": return bedrockPriceCompleteTokens;
default: return 0.0;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -269,18 +269,25 @@ private void processTokenMetric(Metric metric, String serviceName) {
String modelId = "";
String tokenType = "";
String aiSystem = "";
String aiVendor = "";
for (KeyValue kv : kvList) {
if (kv.getKey().compareTo("gen_ai.response.model") == 0) {
modelId = kv.getValue().getStringValue();
System.out.println("Recv Metric --- Model ID: " + modelId);
} else if (kv.getKey().compareTo("gen_ai.system") == 0) {
aiSystem = kv.getValue().getStringValue();
System.out.println("Recv Metric --- AI System: " + aiSystem);
} else if (kv.getKey().compareTo("vendor") == 0) {
aiVendor = kv.getValue().getStringValue();
System.out.println("Recv Metric --- AI vendor: " + aiVendor);
} else if (kv.getKey().compareTo("gen_ai.token.type") == 0) {
tokenType = kv.getValue().getStringValue();
System.out.println("Recv Metric --- Token Type: " + tokenType);
}
}
if (!aiVendor.isEmpty() && !modelId.isEmpty()) {
modelId = aiVendor + "." + modelId;
}
if (!modelId.isEmpty()) {
double tokenSum = dataPoint.getSum();
long requestCount = dataPoint.getCount();
Expand Down Expand Up @@ -331,15 +338,22 @@ private void processDurationMetric(Metric metric, String serviceName) {
List<KeyValue> kvList = dataPoint.getAttributesList();
String modelId = "";
String aiSystem = "";
String aiVendor = ""; // for Bedrock
for (KeyValue kv : kvList) {
if (kv.getKey().compareTo("gen_ai.response.model") == 0) {
modelId = kv.getValue().getStringValue();
System.out.println("Recv Metric --- Model ID: " + modelId);
} else if (kv.getKey().compareTo("gen_ai.system") == 0) {
aiSystem = kv.getValue().getStringValue();
System.out.println("Recv Metric --- AI System: " + aiSystem);
} else if (kv.getKey().compareTo("vendor") == 0) {
aiVendor = kv.getValue().getStringValue();
System.out.println("Recv Metric --- AI vendor: " + aiVendor);
}
}
if (!aiVendor.isEmpty() && !modelId.isEmpty()) {
modelId = aiVendor + "." + modelId;
}
if (!modelId.isEmpty()) {
double durationSum = dataPoint.getSum();
long requestCount = dataPoint.getCount();
Expand Down

0 comments on commit 575b23f

Please sign in to comment.