diff --git a/ksmt-neurosmt/build.gradle.kts b/ksmt-neurosmt/build.gradle.kts new file mode 100644 index 000000000..19b0fd55c --- /dev/null +++ b/ksmt-neurosmt/build.gradle.kts @@ -0,0 +1,39 @@ +plugins { + id("io.ksmt.ksmt-base") +} + +repositories { + mavenCentral() +} + +dependencies { + implementation(project(":ksmt-core")) + implementation(project("utils")) + + implementation("com.microsoft.onnxruntime:onnxruntime:1.15.1") + implementation("com.github.ajalt.clikt:clikt:3.5.2") +} + +tasks { + val standaloneModelRunnerFatJar = register("standaloneModelRunnerFatJar") { + dependsOn.addAll(listOf("compileJava", "compileKotlin", "processResources")) + + archiveFileName.set("standalone-model-runner.jar") + destinationDirectory.set(File(".")) + duplicatesStrategy = DuplicatesStrategy.EXCLUDE + + manifest { + attributes(mapOf("Main-Class" to "io.ksmt.solver.neurosmt.runtime.standalone.StandaloneModelRunnerKt")) + } + + val sourcesMain = sourceSets.main.get() + val contents = configurations.runtimeClasspath.get() + .map { if (it.isDirectory) it else zipTree(it) } + sourcesMain.output + + from(contents) + } + + build { + dependsOn(standaloneModelRunnerFatJar) + } +} \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/KNeuroSMTSolver.kt b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/KNeuroSMTSolver.kt new file mode 100644 index 000000000..d5e943021 --- /dev/null +++ b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/KNeuroSMTSolver.kt @@ -0,0 +1,87 @@ +package io.ksmt.solver.neurosmt + +import io.ksmt.KContext +import io.ksmt.expr.KExpr +import io.ksmt.solver.KModel +import io.ksmt.solver.KSolver +import io.ksmt.solver.KSolverStatus +import io.ksmt.solver.neurosmt.runtime.NeuroSMTModelRunner +import io.ksmt.sort.KBoolSort +import kotlin.time.Duration + +class KNeuroSMTSolver( + private val ctx: KContext, + ordinalsPath: String, embeddingPath: String, convPath: String, decoderPath: String, + private val threshold: Double = 0.5 +) : KSolver { + + private val modelRunner = NeuroSMTModelRunner(ctx, ordinalsPath, embeddingPath, convPath, decoderPath) + private val asserts = mutableListOf>>(mutableListOf()) + + override fun configure(configurator: KNeuroSMTSolverConfiguration.() -> Unit) { + TODO("Not yet implemented") + } + + override fun assert(expr: KExpr) { + asserts.last().add(expr) + } + + override fun assertAndTrack(expr: KExpr) { + assert(expr) + } + + override fun push() { + asserts.add(mutableListOf()) + } + + override fun pop(n: UInt) { + repeat(n.toInt()) { + asserts.removeLast() + } + } + + override fun check(timeout: Duration): KSolverStatus { + val prob = with(ctx) { + modelRunner.run(mkAnd(asserts.flatten())) + } + + return if (prob > threshold) { + KSolverStatus.SAT + } else { + KSolverStatus.UNSAT + } + } + + override fun checkWithAssumptions(assumptions: List>, timeout: Duration): KSolverStatus { + val prob = with(ctx) { + modelRunner.run(mkAnd(asserts.flatten() + assumptions)) + } + + return if (prob > threshold) { + KSolverStatus.SAT + } else { + KSolverStatus.UNSAT + } + } + + override fun model(): KModel { + TODO("Not yet implemented") + } + + override fun unsatCore(): List> { + TODO("Not yet implemented") + } + + override fun reasonOfUnknown(): String { + TODO("Not yet implemented") + } + + override fun interrupt() { + TODO("Not yet implemented") + } + + override fun close() { + modelRunner.close() + asserts.clear() + } +} \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/KNeuroSMTSolverConfiguration.kt b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/KNeuroSMTSolverConfiguration.kt new file mode 100644 index 000000000..8bf4ba534 --- /dev/null +++ b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/KNeuroSMTSolverConfiguration.kt @@ -0,0 +1,44 @@ +package io.ksmt.solver.neurosmt + +import io.ksmt.solver.KSolverConfiguration + +interface KNeuroSMTSolverConfiguration : KSolverConfiguration { + fun setOption(option: String, value: Boolean) + fun setOption(option: String, value: Int) + fun setOption(option: String, value: Double) + fun setOption(option: String, value: String) + + override fun setBoolParameter(param: String, value: Boolean) { + setOption(param, value) + } + + override fun setIntParameter(param: String, value: Int) { + setOption(param, value) + } + + override fun setDoubleParameter(param: String, value: Double) { + setOption(param, value) + } + + override fun setStringParameter(param: String, value: String) { + setOption(param, value) + } +} + +class KNeuroSMTSolverConfigurationImpl(private val params: Any?) : KNeuroSMTSolverConfiguration { + override fun setOption(option: String, value: Boolean) { + TODO("Not yet implemented") + } + + override fun setOption(option: String, value: Int) { + TODO("Not yet implemented") + } + + override fun setOption(option: String, value: Double) { + TODO("Not yet implemented") + } + + override fun setOption(option: String, value: String) { + TODO("Not yet implemented") + } +} \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/ExprEncoder.kt b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/ExprEncoder.kt new file mode 100644 index 000000000..c7cf9798b --- /dev/null +++ b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/ExprEncoder.kt @@ -0,0 +1,126 @@ +package io.ksmt.solver.neurosmt.runtime + +import ai.onnxruntime.OnnxTensor +import ai.onnxruntime.OrtEnvironment +import io.ksmt.KContext +import io.ksmt.expr.KApp +import io.ksmt.expr.KConst +import io.ksmt.expr.KExpr +import io.ksmt.expr.KInterpretedValue +import io.ksmt.expr.transformer.KNonRecursiveTransformer +import io.ksmt.sort.* +import java.nio.FloatBuffer +import java.nio.IntBuffer +import java.nio.LongBuffer +import java.util.* + +// expression encoder +// walks on an expression and calculates state for each node +// based on states for its children (which are already calculated at that moment) +class ExprEncoder( + override val ctx: KContext, + val env: OrtEnvironment, + val ordinalEncoder: OrdinalEncoder, + val embeddingLayer: ONNXModel, + val convLayer: ONNXModel +) : KNonRecursiveTransformer(ctx) { + + private val exprToState = IdentityHashMap, OnnxTensor>() + + fun encodeExpr(expr: KExpr<*>): OnnxTensor { + apply(expr) + + return exprToState[expr] ?: error("expression state wasn't calculated yet") + } + + override fun transformApp(expr: KApp): KExpr { + val state = when (expr) { + is KConst<*> -> calcSymbolicVariableState(expr) + is KInterpretedValue<*> -> calcValueState(expr) + else -> calcAppState(expr) + } + + exprToState[expr] = state + + return expr + } + + private fun getNodeEmbedding(key: String): OnnxTensor { + val nodeLabel = ordinalEncoder.getOrdinal(key) + val labelTensor = OnnxTensor.createTensor( + env, IntBuffer.allocate(1).put(nodeLabel).rewind(), longArrayOf(1, 1) + ) + + return embeddingLayer.forward(mapOf("node_labels" to labelTensor)) + } + + private fun createEdgeTensor(childrenCnt: Int): OnnxTensor { + val edges = listOf( + List(childrenCnt) { it + 1L }, + List(childrenCnt) { 0L } + ) + + val buffer = LongBuffer.allocate(childrenCnt * 2) + edges.forEach { row -> + row.forEach { node -> + buffer.put(node) + } + } + buffer.rewind() + + return OnnxTensor.createTensor(env, buffer, longArrayOf(2, childrenCnt.toLong())) + } + + private fun calcAppState(expr: KApp): OnnxTensor { + val childrenStates = expr.args.map { exprToState[it] ?: error("expression state wasn't calculated yet") } + val childrenCnt = childrenStates.size + + val nodeEmbedding = getNodeEmbedding(expr.decl.name) + val embeddingSize = nodeEmbedding.info.shape.reduce { acc, l -> acc * l } + + val buffer = FloatBuffer.allocate((1 + childrenCnt) * embeddingSize.toInt()) + buffer.put(nodeEmbedding.floatBuffer) + childrenStates.forEach { + buffer.put(it.floatBuffer) + } + buffer.rewind() + + val nodeFeatures = OnnxTensor.createTensor(env, buffer, longArrayOf(1L + childrenCnt, embeddingSize)) + val edges = createEdgeTensor(childrenStates.size) + val result = convLayer.forward(mapOf("node_features" to nodeFeatures, "edges" to edges)) + + return OnnxTensor.createTensor(env, result.floatBuffer.slice(0, embeddingSize.toInt()), longArrayOf(1L, embeddingSize)) + } + + private fun calcSymbolicVariableState(symbol: KConst): OnnxTensor { + val sort = symbol.decl.sort + + val key = "SYMBOLIC;" + when (sort) { + is KBoolSort -> "Bool" + is KBvSort -> "BitVec" + is KFpSort -> "FP" + is KFpRoundingModeSort -> "FP_RM" + is KArraySortBase<*> -> "Array" + is KUninterpretedSort -> sort.name + else -> error("unknown symbolic sort: ${sort::class.simpleName}") + } + + return getNodeEmbedding(key) + } + + private fun calcValueState(value: KInterpretedValue): OnnxTensor { + val sort = value.decl.sort + + val key = "VALUE;" + when (sort) { + is KBoolSort -> "Bool" + is KBvSort -> "BitVec" + is KFpSort -> "FP" + is KFpRoundingModeSort -> "FP_RM" + is KArraySortBase<*> -> "Array" + is KUninterpretedSort -> sort.name + else -> error("unknown value sort: ${value.decl.sort::class.simpleName}") + } + + return getNodeEmbedding(key) + } +} \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/NeuroSMTModelRunner.kt b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/NeuroSMTModelRunner.kt new file mode 100644 index 000000000..31a481c08 --- /dev/null +++ b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/NeuroSMTModelRunner.kt @@ -0,0 +1,36 @@ +package io.ksmt.solver.neurosmt.runtime + +import ai.onnxruntime.OrtEnvironment +import io.ksmt.KContext +import io.ksmt.expr.KExpr +import kotlin.math.exp + +// wrapper for NeuroSMT model +// runs the whole model pipeline +class NeuroSMTModelRunner( + val ctx: KContext, + ordinalsPath: String, embeddingPath: String, convPath: String, decoderPath: String +) { + val env = OrtEnvironment.getEnvironment() + + val ordinalEncoder = OrdinalEncoder(ordinalsPath) + val embeddingLayer = ONNXModel(env, embeddingPath) + val convLayer = ONNXModel(env, convPath) + + val decoder = ONNXModel(env, decoderPath) + + fun run(expr: KExpr<*>): Float { + val encoder = ExprEncoder(ctx, env, ordinalEncoder, embeddingLayer, convLayer) + val exprFeatures = encoder.encodeExpr(expr) + val result = decoder.forward(mapOf("expr_features" to exprFeatures)) + val logit = result.floatBuffer[0] + + return 1f / (1f + exp(-logit)) // sigmoid calculation + } + + fun close() { + embeddingLayer.close() + convLayer.close() + decoder.close() + } +} \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/ONNXModel.kt b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/ONNXModel.kt new file mode 100644 index 000000000..928ed65e1 --- /dev/null +++ b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/ONNXModel.kt @@ -0,0 +1,18 @@ +package io.ksmt.solver.neurosmt.runtime + +import ai.onnxruntime.OnnxTensor +import ai.onnxruntime.OrtEnvironment + +// wrapper for any exported ONNX model +class ONNXModel(env: OrtEnvironment, modelPath: String) : AutoCloseable { + val session = env.createSession(modelPath) + + fun forward(input: Map): OnnxTensor { + val result = session.run(input) + return result.get(0) as OnnxTensor + } + + override fun close() { + session.close() + } +} \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/OrdinalEncoder.kt b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/OrdinalEncoder.kt new file mode 100644 index 000000000..f3090a8a1 --- /dev/null +++ b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/OrdinalEncoder.kt @@ -0,0 +1,23 @@ +package io.ksmt.solver.neurosmt.runtime + +import java.nio.file.Files +import java.nio.file.Path +import kotlin.streams.asSequence + +const val UNKNOWN_VALUE = 1999 + +// wrapper for single-feature sklearn OrdinalEncoder (for each string we should provide its ordinal) +// used to convert strings to integers +class OrdinalEncoder(ordinalsPath: String, private val unknownValue: Int = UNKNOWN_VALUE) { + private val lookup = HashMap() + + init { + Files.lines(Path.of(ordinalsPath)).asSequence().forEachIndexed { index, s -> + lookup[s] = index + } + } + + fun getOrdinal(s: String): Int { + return lookup[s] ?: unknownValue + } +} \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/standalone/CLArgs.kt b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/standalone/CLArgs.kt new file mode 100644 index 000000000..92cd23848 --- /dev/null +++ b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/standalone/CLArgs.kt @@ -0,0 +1,30 @@ +package io.ksmt.solver.neurosmt.runtime.standalone + +import com.github.ajalt.clikt.core.CliktCommand +import com.github.ajalt.clikt.parameters.options.default +import com.github.ajalt.clikt.parameters.options.option +import com.github.ajalt.clikt.parameters.options.required +import com.github.ajalt.clikt.parameters.types.double + +class CLArgs : CliktCommand() { + override fun run() = Unit + + val datasetPath: String by + option("-D", "--data", help = "path to dataset").required() + + val ordinalsPath: String by + option("-o", "--ordinals", help = "path to ordinal encoder categories").required() + + val embeddingsPath: String by + option("-e", "--embeddings", help = "path to embeddings layer").required() + + val convPath: String by + option("-c", "--conv", help = "path to conv layer").required() + + val decoderPath: String by + option("-d", "--decoder", help = "path to decoder").required() + + val threshold: Double by + option("-t", "--threshold", help = "probability threshold for sat/unsat decision") + .double().default(0.5) +} \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/standalone/StandaloneModelRunner.kt b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/standalone/StandaloneModelRunner.kt new file mode 100644 index 000000000..4f4622388 --- /dev/null +++ b/ksmt-neurosmt/src/main/kotlin/io/ksmt/solver/neurosmt/runtime/standalone/StandaloneModelRunner.kt @@ -0,0 +1,174 @@ +package io.ksmt.solver.neurosmt.runtime.standalone + +import com.github.ajalt.clikt.core.NoSuchParameter +import com.github.ajalt.clikt.core.PrintHelpMessage +import com.github.ajalt.clikt.core.UsageError +import io.ksmt.KContext +import io.ksmt.solver.KSolverStatus +import io.ksmt.solver.neurosmt.deserialize +import io.ksmt.solver.neurosmt.runtime.NeuroSMTModelRunner +import java.io.File +import java.io.FileInputStream +import java.nio.file.Files +import java.nio.file.Path +import kotlin.io.path.isRegularFile +import kotlin.io.path.name +import kotlin.io.path.pathString +import kotlin.math.roundToLong +import kotlin.math.sqrt +import kotlin.time.ExperimentalTime +import kotlin.time.measureTimedValue + +fun printStats( + sat: Int, unsat: Int, skipped: Int, + ok: Int, fail: Int, + confusionMatrix: Map, Int> +) { + println("$sat sat; $unsat unsat; $skipped skipped") + println("$ok ok; $fail failed") + println() + + val satToSat = confusionMatrix.getOrDefault(KSolverStatus.SAT to KSolverStatus.SAT, 0) + val unsatToUnsat = confusionMatrix.getOrDefault(KSolverStatus.UNSAT to KSolverStatus.UNSAT, 0) + val unsatToSat = confusionMatrix.getOrDefault(KSolverStatus.UNSAT to KSolverStatus.SAT, 0) + val satToUnsat = confusionMatrix.getOrDefault(KSolverStatus.SAT to KSolverStatus.UNSAT, 0) + + println("target vs output") + println(" sat vs sat : $satToSat") + println("unsat vs unsat : $unsatToUnsat") + println("unsat vs sat : $unsatToSat") + println(" sat vs unsat : $satToUnsat") + println() +} + +fun printModelRunTimeStats(modelRunTimes: List) { + val mean = modelRunTimes.average() + val std = sqrt(modelRunTimes.map { it - mean }.map { it * it }.average()) + + println("run time:") + println("mean: ${mean.roundToLong()}μs; std: ${std.roundToLong()}μs") + println() +} + +// standalone tool to run NeuroSMT model in kotlin +@OptIn(ExperimentalTime::class) +fun main(args: Array) { + val arguments = CLArgs() + try { + arguments.parse(args) + } catch (e: PrintHelpMessage) { + println(arguments.getFormattedHelp()) + return + } catch (e: NoSuchParameter) { + println(e.message) + return + } catch (e: UsageError) { + println(e.message) + return + } catch (e: Exception) { + println("Error!\n$e") + return + } + + val ctx = KContext( + astManagementMode = KContext.AstManagementMode.NO_GC, + simplificationMode = KContext.SimplificationMode.NO_SIMPLIFY + ) + + val files = Files.walk(Path.of(arguments.datasetPath)).filter { it.isRegularFile() }.toList() + + val runner = NeuroSMTModelRunner( + ctx, + ordinalsPath = arguments.ordinalsPath, + embeddingPath = arguments.embeddingsPath, + convPath = arguments.convPath, + decoderPath = arguments.decoderPath + ) + + val modelRunTimes = mutableListOf() + + var sat = 0; var unsat = 0; var skipped = 0 + var ok = 0; var fail = 0 + + val confusionMatrix = mutableMapOf, Int>() + + files.forEachIndexed sample@{ ind, it -> + if (ind % 100 == 0) { + println("\n#$ind:") + printStats(sat, unsat, skipped, ok, fail, confusionMatrix) + } + + val sampleFile = File(it.pathString) + + val assertList = try { + deserialize(ctx, FileInputStream(sampleFile)) + } catch (e: Exception) { + skipped++ + return@sample + } + + val answer = when { + it.name.endsWith("-sat") -> KSolverStatus.SAT + it.name.endsWith("-unsat") -> KSolverStatus.UNSAT + else -> KSolverStatus.UNKNOWN + } + + if (answer == KSolverStatus.UNKNOWN) { + skipped++ + return@sample + } + + val prob = with(ctx) { + val formula = when (assertList.size) { + 0 -> { + skipped++ + return@sample + } + 1 -> { + assertList[0] + } + else -> { + mkAnd(assertList) + } + } + + val timedVal = measureTimedValue { + runner.run(formula) + } + + modelRunTimes.add(timedVal.duration.inWholeMicroseconds) + timedVal.value + } + + val output = if (prob > arguments.threshold) { + KSolverStatus.SAT + } else { + KSolverStatus.UNSAT + } + + when (answer) { + KSolverStatus.SAT -> sat++ + KSolverStatus.UNSAT -> unsat++ + else -> { /* can't happen */ } + } + + if (output == answer) { + ok++ + } else { + fail++ + } + + confusionMatrix.compute(answer to output) { _, v -> + if (v == null) { + 1 + } else { + v + 1 + } + } + } + + println() + printStats(sat, unsat, skipped, ok, fail, confusionMatrix) + + printModelRunTimeStats(modelRunTimes) +} \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/python/Decoder.py b/ksmt-neurosmt/src/main/python/Decoder.py new file mode 100644 index 000000000..7cb9d61e2 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/Decoder.py @@ -0,0 +1,30 @@ +import torch +import torch.nn as nn + +from GlobalConstants import DECODER_LAYERS + + +class Decoder(nn.Module): + def __init__(self, hidden_dim: int): + super().__init__() + + self.act = nn.ReLU() + self.linears = nn.ModuleList([nn.Linear(hidden_dim, hidden_dim) for _ in range(DECODER_LAYERS - 1)]) + self.out = nn.Linear(hidden_dim, 1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + decoder forward pass + + :param x: torch.Tensor of shape [batch size, hidden dimension size] (dtype=float) + :return: torch.Tensor of shape [batch size, 1] (dtype=float) -- + each element is a logit for probability of formula to be SAT + """ + + for layer in self.linears: + x = layer(x) + x = self.act(x) + + x = self.out(x) + + return x diff --git a/ksmt-neurosmt/src/main/python/Encoder.py b/ksmt-neurosmt/src/main/python/Encoder.py new file mode 100644 index 000000000..380b774f0 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/Encoder.py @@ -0,0 +1,47 @@ +import torch +import torch.nn as nn + +from torch_geometric.nn import SAGEConv + +from GlobalConstants import EMBEDDINGS_CNT + + +class Encoder(nn.Module): + def __init__(self, hidden_dim: int): + super().__init__() + + self.embedding = nn.Embedding(EMBEDDINGS_CNT, hidden_dim) + self.conv = SAGEConv(hidden_dim, hidden_dim, "mean", root_weight=True, project=True) + + # other options (not supported yet) + # self.conv = GATConv(hidden_dim, hidden_dim, add_self_loops=True) # this can't be exported to ONNX + # self.conv = TransformerConv(hidden_dim, hidden_dim, root_weight=True) # this can't be exported to ONNX + + def forward( + self, node_labels: torch.Tensor, edges: torch.Tensor, depths: torch.Tensor, root_ptrs: torch.Tensor + ) -> torch.Tensor: + """ + encoder forward pass + + :param node_labels: torch.Tensor of shape [number of nodes in batch, 1] (dtype=int32) + :param edges: torch.Tensor of shape [2, number of edges in batch] (dtype=int64) + :param depths: torch.Tensor of shape [number of nodes in batch] (dtype=int32) + :param root_ptrs: torch.Tensor of shape [batch size + 1] (dtype=int32) -- + pointers to root of graph for each expression + :return: torch.Tensor of shape [batch size, hidden dimension size] (dtype=float) -- + embeddings for each expression + """ + + node_features = self.embedding(node_labels.squeeze()) + + depth = depths.max() + for i in range(1, depth + 1): + mask = (depths == i) + new_features = self.conv(node_features, edges) + + node_features = torch.clone(node_features) + node_features[mask] = new_features[mask] + + node_features = node_features[root_ptrs[1:] - 1] + + return node_features diff --git a/ksmt-neurosmt/src/main/python/GlobalConstants.py b/ksmt-neurosmt/src/main/python/GlobalConstants.py new file mode 100644 index 000000000..bff231712 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/GlobalConstants.py @@ -0,0 +1,11 @@ +BATCH_SIZE = 1024 +MAX_FORMULA_SIZE = 10000 +MAX_FORMULA_DEPTH = 2500 +NUM_WORKERS = 32 +METADATA_PATH = "__meta" + +EMBEDDINGS_CNT = 2000 +EMBEDDING_DIM = 32 +DECODER_LAYERS = 4 + +MAX_EPOCHS = 200 diff --git a/ksmt-neurosmt/src/main/python/GraphDataloader.py b/ksmt-neurosmt/src/main/python/GraphDataloader.py new file mode 100644 index 000000000..f4672e130 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/GraphDataloader.py @@ -0,0 +1,105 @@ +import os +from typing import Literal + +import numpy as np +import joblib +from tqdm import tqdm + +import torch +from torch.utils.data import Dataset + +from torch_geometric.data import Data as Graph +from torch_geometric.loader import DataLoader + +from GlobalConstants import BATCH_SIZE, MAX_FORMULA_SIZE, MAX_FORMULA_DEPTH, NUM_WORKERS, METADATA_PATH +from GraphReader import read_graph_by_path + + +# torch Dataset +class GraphDataset(Dataset): + def __init__(self, graph_data): + self.graphs = [Graph( + x=torch.tensor(nodes, dtype=torch.int32), + edge_index=torch.tensor(edges, dtype=torch.int64).t(), + y=torch.tensor([[label]], dtype=torch.int32), + depth=torch.tensor(depths, dtype=torch.int32) + ) for nodes, edges, label, depths in graph_data] + + def __len__(self): + return len(self.graphs) + + def __getitem__(self, index): + return self.graphs[index] + + +# load all samples from all datasets and return them as a list of tuples +def load_data(paths_to_datasets: list[str], target: Literal["train", "val", "test"])\ + -> list[tuple[list[str], list[tuple[int, int]], int, list[int]]]: + + data = [] + + print(f"loading {target}") + for path_to_dataset in paths_to_datasets: + print(f"loading data from '{path_to_dataset}'") + + with open(os.path.join(path_to_dataset, METADATA_PATH, target), "r") as f: + for path_to_sample in tqdm(list(f.readlines())): + path_to_sample = os.path.join(path_to_dataset, path_to_sample.strip()) + + operators, edges, depths = read_graph_by_path( + path_to_sample, max_size=MAX_FORMULA_SIZE, max_depth=MAX_FORMULA_DEPTH + ) + + if operators is None or edges is None or depths is None: + continue + + if len(edges) == 0: + print(f"w: ignoring formula without edges; file '{path_to_sample}'") + continue + + if path_to_sample.endswith("-sat"): + label = 1 + elif path_to_sample.endswith("-unsat"): + label = 0 + else: + raise Exception(f"strange file path '{path_to_sample}'") + + data.append((operators, edges, label, depths)) + + return data + + +# load samples from all datasets, transform them and return them in a Dataloader object +def get_dataloader(paths_to_datasets: list[str], target: Literal["train", "val", "test"], path_to_ordinal_encoder: str)\ + -> DataLoader: + + print(f"creating dataloader for {target}") + + print("loading data") + data = load_data(paths_to_datasets, target) + + print(f"stats: {len(data)} overall; sat fraction is {sum(it[2] for it in data) / len(data)}") + + print("loading encoder") + encoder = joblib.load(path_to_ordinal_encoder) + + def transform(data_for_one_sample: tuple[list[str], list[tuple[int, int]], int, list[int]])\ + -> tuple[list[str], list[tuple[int, int]], int, list[int]]: + + nodes, edges, label, depths = data_for_one_sample + nodes = encoder.transform(np.array(nodes).reshape(-1, 1)) + + return nodes, edges, label, depths + + print("transforming") + data = list(map(transform, data)) + + print("creating dataset") + ds = GraphDataset(data) + + print("constructing dataloader\n", flush=True) + return DataLoader( + ds.graphs, + batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, + shuffle=(target == "train"), drop_last=(target == "train") + ) diff --git a/ksmt-neurosmt/src/main/python/GraphReader.py b/ksmt-neurosmt/src/main/python/GraphReader.py new file mode 100644 index 000000000..741d7b642 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/GraphReader.py @@ -0,0 +1,79 @@ +from enum import Enum +from typing import Union, TextIO + + +class VertexType(Enum): + SYMBOLIC = 0 + VALUE = 1 + APP = 2 + + +def get_vertex_type(name: str): + if name == "SYMBOLIC": + return VertexType.SYMBOLIC + + if name == "VALUE": + return VertexType.VALUE + + return VertexType.APP + + +def process_line(line: str, cur_vertex: int, operators: list[str], edges: list[tuple[int, int]], depth: list[int]): + name, info = line.split(";") + info = info.strip() + # depth[v] is a length of the longest path from vertex v to any sink in an expression graph + depth.append(0) + + if get_vertex_type(name) == VertexType.APP: + operators.append(name) + + children = map(int, info.split(" ")) + for u in children: + edges.append((u, cur_vertex)) + depth[cur_vertex] = max(depth[cur_vertex], depth[u] + 1) + + else: + operators.append(name + ";" + info) + + +def read_graph_from_file(inf: TextIO, max_size: int, max_depth: int)\ + -> Union[tuple[list[str], list[tuple[int, int]], list[int]], tuple[None, None, None]]: + + operators, edges, depth = [], [], [] + + cur_vertex = 0 + for line in inf.readlines(): + line = line.strip() + + if line.startswith(";"): + continue # lines starting with ";" are considered to be comments + + try: + process_line(line, cur_vertex, operators, edges, depth) + + except Exception as e: + print(e, "\n") + print(inf.name, "\n") + print(cur_vertex, line, "\n") + raise e + + if cur_vertex >= max_size or depth[cur_vertex] > max_depth: + return None, None, None + + cur_vertex += 1 + + return operators, edges, depth + + +def read_graph_by_path(path: str, max_size: int, max_depth: int)\ + -> Union[tuple[list[str], list[tuple[int, int]], list[int]], tuple[None, None, None]]: + + with open(path, "r") as inf: + try: + return read_graph_from_file(inf, max_size, max_depth) + + except Exception as e: + print(e) + print(f"path: '{path}'") + + raise e diff --git a/ksmt-neurosmt/src/main/python/LightningModel.py b/ksmt-neurosmt/src/main/python/LightningModel.py new file mode 100644 index 000000000..88d495bae --- /dev/null +++ b/ksmt-neurosmt/src/main/python/LightningModel.py @@ -0,0 +1,164 @@ +from typing import Literal + +from sklearn.metrics import classification_report + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import pytorch_lightning as pl + +from torchmetrics.classification import BinaryAccuracy, BinaryConfusionMatrix, BinaryAUROC, BinaryPrecisionAtFixedRecall + +from GlobalConstants import EMBEDDING_DIM +from Model import Model + + +def unpack_batch(batch): + node_labels, edges, depths, root_ptrs = batch.x, batch.edge_index, batch.depth, batch.ptr + + return node_labels, edges, depths, root_ptrs + + +class LightningModel(pl.LightningModule): + """ + PyTorch Lightning wrapper for model + """ + + def __init__(self): + super().__init__() + + self.model = Model(hidden_dim=EMBEDDING_DIM) + + self.val_outputs = [] + self.val_targets = [] + + # different metrics + self.acc = BinaryAccuracy() + self.confusion_matrix = BinaryConfusionMatrix() + + self.roc_auc = BinaryAUROC() + self.precisions_at_recall = nn.ModuleList([ + BinaryPrecisionAtFixedRecall(rec) for rec in [0.75, 0.8, 0.85, 0.9, 0.95, 0.975, 0.99, 1.0] + ]) + + # forward pass is just the same as in the original model + def forward(self, node_labels, edges, depths, root_ptrs): + return self.model(node_labels, edges, depths, root_ptrs) + + def configure_optimizers(self): + params = [p for p in self.model.parameters() if p is not None and p.requires_grad] + optimizer = torch.optim.Adam(params, lr=1e-4) + + return optimizer + + def training_step(self, train_batch, batch_idx): + out = self.model(*unpack_batch(train_batch)) + loss = F.binary_cross_entropy_with_logits(out, train_batch.y.to(torch.float)) + + out = F.sigmoid(out) + + self.log( + "train/loss", loss.detach().float(), + prog_bar=True, logger=True, + on_step=True, on_epoch=True, + batch_size=train_batch.num_graphs + ) + self.log( + "train/acc", self.acc(out, train_batch.y), + prog_bar=True, logger=True, + on_step=False, on_epoch=True, + batch_size=train_batch.num_graphs + ) + + return loss + + def shared_val_test_step(self, batch, batch_idx, target_name: Literal["val", "test"]): + out = self.model(*unpack_batch(batch)) + loss = F.binary_cross_entropy_with_logits(out, batch.y.to(torch.float)) + + out = F.sigmoid(out) + + self.roc_auc.update(out, batch.y) + for precision_at_recall in self.precisions_at_recall: + precision_at_recall.update(out, batch.y) + + self.log( + f"{target_name}/loss", loss.float(), + prog_bar=True, logger=True, + on_step=False, on_epoch=True, + batch_size=batch.num_graphs + ) + self.log( + f"{target_name}/acc", self.acc(out, batch.y), + prog_bar=True, logger=True, + on_step=False, on_epoch=True, + batch_size=batch.num_graphs + ) + + self.val_outputs.append(out) + self.val_targets.append(batch.y) + + return loss + + def shared_val_test_epoch_end(self, target_name: Literal["val", "test"]): + print("\n", flush=True) + + all_outputs = torch.flatten(torch.cat(self.val_outputs)) + all_targets = torch.flatten(torch.cat(self.val_targets)) + + self.val_outputs.clear() + self.val_targets.clear() + + self.print_confusion_matrix_and_classification_report(all_outputs, all_targets) + + self.log( + f"{target_name}/roc-auc", self.roc_auc.compute(), + prog_bar=True, logger=True, + on_step=False, on_epoch=True + ) + self.roc_auc.reset() + + for precision_at_recall in self.precisions_at_recall: + precision = precision_at_recall.compute()[0] + recall = precision_at_recall.min_recall + + self.log( + f"{target_name}/precision_at_{recall}", precision, + prog_bar=False, logger=True, + on_step=False, on_epoch=True + ) + + precision_at_recall.reset() + + def validation_step(self, val_batch, batch_idx): + return self.shared_val_test_step(val_batch, batch_idx, "val") + + def print_confusion_matrix_and_classification_report(self, all_outputs: torch.Tensor, all_targets: torch.Tensor): + conf_mat = self.confusion_matrix(all_outputs, all_targets).detach().cpu().numpy() + + print(" +-------+-----------+-----------+") + print(" ", "|", "unsat", "|", str(conf_mat[0][0]).rjust(9, " "), "|", str(conf_mat[0][1]).rjust(9, " "), "|") + print("targets", "|", " sat", "|", str(conf_mat[1][0]).rjust(9, " "), "|", str(conf_mat[1][1]).rjust(9, " "), "|") + print(" +-------+-----------+-----------+") + print(" ", "|", " ", "|", " unsat ", "|", " sat ", "|") + print(" +-------+-----------+-----------+") + print(" preds", "\n", sep="") + + all_outputs = all_outputs.float().cpu().numpy() + all_targets = all_targets.float().cpu().numpy() + + all_outputs = all_outputs > 0.5 + print( + classification_report(all_targets, all_outputs, target_names=["unsat", "sat"], digits=3, zero_division=0.0), + flush=True + ) + + def on_validation_epoch_end(self): + self.shared_val_test_epoch_end("val") + + def test_step(self, test_batch, batch_idx): + return self.shared_val_test_step(test_batch, batch_idx, "test") + + def on_test_epoch_end(self): + self.shared_val_test_epoch_end("test") diff --git a/ksmt-neurosmt/src/main/python/Model.py b/ksmt-neurosmt/src/main/python/Model.py new file mode 100644 index 000000000..91ff8a525 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/Model.py @@ -0,0 +1,33 @@ +import torch +import torch.nn as nn + +from Encoder import Encoder +from Decoder import Decoder + + +class Model(nn.Module): + def __init__(self, hidden_dim: int): + super().__init__() + + self.encoder = Encoder(hidden_dim=hidden_dim) + self.decoder = Decoder(hidden_dim=hidden_dim) + + def forward( + self, node_labels: torch.Tensor, edges: torch.Tensor, depths: torch.Tensor, root_ptrs: torch.Tensor + ) -> torch.Tensor: + """ + model full forward pass (encoder + decoder) + + :param node_labels: torch.Tensor of shape [number of nodes in batch, 1] (dtype=int32) + :param edges: torch.Tensor of shape [2, number of edges in batch] (dtype=int64) + :param depths: torch.Tensor of shape [number of nodes in batch] (dtype=int32) + :param root_ptrs: torch.Tensor of shape [batch size + 1] (dtype=int32) -- + pointers to root of graph for each expression + :return: torch.Tensor of shape [batch size, 1] (dtype=float) -- + each element is a logit for probability of formula to be SAT + """ + + x = self.encoder(node_labels, edges, depths, root_ptrs) + x = self.decoder(x) + + return x diff --git a/ksmt-neurosmt/src/main/python/check-split-leaks.py b/ksmt-neurosmt/src/main/python/check-split-leaks.py new file mode 100755 index 000000000..141d7e739 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/check-split-leaks.py @@ -0,0 +1,54 @@ +#!/usr/bin/python3 + +import os +from argparse import ArgumentParser +from typing import Literal + +from tqdm import tqdm + +from GlobalConstants import METADATA_PATH + + +def get_groups_set(paths_to_datasets: list[str], target: Literal["train", "val", "test"]) -> set: + groups = set() + + print(f"loading {target}") + for path_to_dataset in paths_to_datasets: + print(f"loading data from '{path_to_dataset}'") + + with open(os.path.join(path_to_dataset, METADATA_PATH, target), "r") as f: + for path_to_sample in tqdm(list(f.readlines())): + path_to_sample = path_to_sample.strip() + path_to_parent = os.path.dirname(path_to_sample) + + groups.add(str(path_to_parent)) + + return groups + + +def get_args(): + parser = ArgumentParser(description="validation script") + parser.add_argument("--ds", required=True, nargs="+") + + args = parser.parse_args() + print("args:") + for arg in vars(args): + print(arg, "=", getattr(args, arg)) + + print() + + return args + + +if __name__ == "__main__": + args = get_args() + + train_groups = get_groups_set(args.ds, "train") + val_groups = get_groups_set(args.ds, "val") + test_groups = get_groups_set(args.ds, "test") + + assert train_groups.isdisjoint(val_groups) + assert val_groups.isdisjoint(test_groups) + assert test_groups.isdisjoint(train_groups) + + print("\nsuccess!") diff --git a/ksmt-neurosmt/src/main/python/create-ordinal-encoder.py b/ksmt-neurosmt/src/main/python/create-ordinal-encoder.py new file mode 100755 index 000000000..0ea1a6d3d --- /dev/null +++ b/ksmt-neurosmt/src/main/python/create-ordinal-encoder.py @@ -0,0 +1,59 @@ +#!/usr/bin/python3 + +from argparse import ArgumentParser +import itertools + +import numpy as np +import joblib + +from sklearn.preprocessing import OrdinalEncoder + +from GlobalConstants import EMBEDDINGS_CNT +from GraphDataloader import load_data + + +def create_ordinal_encoder(paths_to_datasets: list[str], path_to_ordinal_encoder: str): + data = load_data(paths_to_datasets, "train") + + encoder = OrdinalEncoder( + dtype=int, + handle_unknown="use_encoded_value", unknown_value=EMBEDDINGS_CNT - 1, + encoded_missing_value=EMBEDDINGS_CNT - 2 + ) + + print("fitting ordinal encoder") + encoder.fit(np.array(list(itertools.chain( + *(list(zip(*data))[0]) + ))).reshape(-1, 1)) + + for cat in encoder.categories_: + if len(cat) > EMBEDDINGS_CNT - 2: + print("w: too many categories") + + print("dumping ordinal encoder") + joblib.dump(encoder, path_to_ordinal_encoder) + + with open(path_to_ordinal_encoder + ".cats", "w") as f: + for sample in encoder.categories_[0]: + f.write(str(sample) + "\n") + + +def get_args(): + parser = ArgumentParser(description="ordinal encoder preparing script") + parser.add_argument("--ds", required=True, nargs="+") + parser.add_argument("--oenc", required=True) + + args = parser.parse_args() + print("args:") + for arg in vars(args): + print(arg, "=", getattr(args, arg)) + + print() + + return args + + +if __name__ == "__main__": + args = get_args() + + create_ordinal_encoder(args.ds, args.oenc) diff --git a/ksmt-neurosmt/src/main/python/export-model.py b/ksmt-neurosmt/src/main/python/export-model.py new file mode 100644 index 000000000..467e37d95 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/export-model.py @@ -0,0 +1,61 @@ +#!/usr/bin/python3 + +import sys + +import torch + +from GlobalConstants import EMBEDDING_DIM +from LightningModel import LightningModel + + +if __name__ == "__main__": + + pl_model = LightningModel().load_from_checkpoint(sys.argv[1], map_location=torch.device("cpu")) + pl_model.eval() + + # input example + node_labels = torch.unsqueeze(torch.arange(0, 9).to(torch.int32), 1) + node_features = pl_model.model.encoder.embedding(node_labels.squeeze()) + edges = torch.tensor([ + [0, 0, 0, 1, 2, 3, 4, 4, 5, 6, 7], + [1, 2, 3, 3, 7, 4, 5, 6, 8, 8, 8] + ], dtype=torch.int64) + + torch.onnx.export( + pl_model.model.encoder.embedding, + (node_labels,), + sys.argv[2], # path to model file + opset_version=18, + input_names=["node_labels"], + output_names=["out"], + dynamic_axes={ + "node_labels": {0: "nodes_number"} + } + ) + + torch.onnx.export( + pl_model.model.encoder.conv, + (node_features, edges), + sys.argv[3], # path to model file + opset_version=18, + input_names=["node_features", "edges"], + output_names=["out"], + dynamic_axes={ + "node_features": {0: "nodes_number"}, + "edges": {1: "edges_number"} + } + ) + + torch.onnx.export( + pl_model.model.decoder, + (torch.rand((1, EMBEDDING_DIM)),), + sys.argv[4], # path to model file + opset_version=18, + input_names=["expr_features"], + output_names=["out"], + dynamic_axes={ + "expr_features": {0: "batch_size"} + } + ) + + print("success!") diff --git a/ksmt-neurosmt/src/main/python/prepare-train-val-test.py b/ksmt-neurosmt/src/main/python/prepare-train-val-test.py new file mode 100755 index 000000000..acede3f30 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/prepare-train-val-test.py @@ -0,0 +1,226 @@ +#!/usr/bin/python3 + +import os +from argparse import ArgumentParser + +import numpy as np +from tqdm import trange + +from pytorch_lightning import seed_everything + +from GlobalConstants import METADATA_PATH +from utils import train_val_test_indices, align_sat_unsat_sizes, select_paths_with_suitable_samples_and_transform_to_paths_from_root + + +def classic_random_split(path_to_dataset_root, val_qty, test_qty, align_train_mode, align_val_mode, align_test_mode): + sat_paths, unsat_paths = [], [] + for root, dirs, files in os.walk(path_to_dataset_root, topdown=True): + if METADATA_PATH in dirs: + dirs.remove(METADATA_PATH) + + for file_name in files: + cur_path = os.path.join(root, file_name) + + if cur_path.endswith("-sat"): + sat_paths.append(cur_path) + elif cur_path.endswith("-unsat"): + unsat_paths.append(cur_path) + else: + raise Exception(f"strange file path '{cur_path}'") + + sat_paths = select_paths_with_suitable_samples_and_transform_to_paths_from_root(path_to_dataset_root, sat_paths) + unsat_paths = select_paths_with_suitable_samples_and_transform_to_paths_from_root(path_to_dataset_root, unsat_paths) + + def split_data_to_train_val_test(data): + train_ind, val_ind, test_ind = train_val_test_indices(len(data), val_qty=val_qty, test_qty=test_qty) + + return [data[i] for i in train_ind], [data[i] for i in val_ind], [data[i] for i in test_ind] + + sat_train, sat_val, sat_test = split_data_to_train_val_test(sat_paths) + unsat_train, unsat_val, unsat_test = split_data_to_train_val_test(unsat_paths) + + sat_train, unsat_train = align_sat_unsat_sizes(sat_train, unsat_train, align_train_mode) + sat_val, unsat_val = align_sat_unsat_sizes(sat_val, unsat_val, align_val_mode) + sat_test, unsat_test = align_sat_unsat_sizes(sat_test, unsat_test, align_test_mode) + + train_data = sat_train + unsat_train + val_data = sat_val + unsat_val + test_data = sat_test + unsat_test + + np.random.shuffle(train_data) + np.random.shuffle(val_data) + np.random.shuffle(test_data) + + return train_data, val_data, test_data + + +def grouped_random_split(path_to_dataset_root, val_qty, test_qty, align_train_mode, align_val_mode, align_test_mode): + def return_group_weight(path_to_group): + return len(os.listdir(path_to_group)) + + def calc_group_weights(path_to_dataset_root): + groups = os.listdir(path_to_dataset_root) + if METADATA_PATH in groups: + groups.remove(METADATA_PATH) + + weights = [return_group_weight(os.path.join(path_to_dataset_root, group)) for group in groups] + + return list(zip(groups, weights)) + + groups = calc_group_weights(path_to_dataset_root) + + def pick_best_split(groups): + attempts = 100_000 + + groups_cnt = len(groups) + samples_cnt = sum(g[1] for g in groups) + + need_val = int(samples_cnt * val_qty) + need_test = int(samples_cnt * test_qty) + need_train = samples_cnt - need_val - need_test + + print("picking best split with existing groups") + print(f"need: {need_train} (train) | {need_val} (val) | {need_test} (test)") + print(flush=True) + + best = None + + for _ in trange(attempts): + cur_split = np.random.randint(3, size=groups_cnt) + + train_size = sum(groups[i][1] for i in range(groups_cnt) if cur_split[i] == 0) + val_size = sum(groups[i][1] for i in range(groups_cnt) if cur_split[i] == 1) + test_size = sum(groups[i][1] for i in range(groups_cnt) if cur_split[i] == 2) + + cur_error = (train_size - need_train) ** 2 + (val_size - need_val) ** 2 + (test_size - need_test) ** 2 + + if best is None or best[0] > cur_error: + best = (cur_error, cur_split) + + return best[1] + + split = pick_best_split(groups) + + train_data, val_data, test_data = [], [], [] + + for i in range(len(groups)): + cur_group = os.listdir(os.path.join(path_to_dataset_root, groups[i][0])) + cur_group = list(map(lambda sample: os.path.join(path_to_dataset_root, groups[i][0], sample), cur_group)) + + if split[i] == 0: + train_data += cur_group + elif split[i] == 1: + val_data += cur_group + elif split[i] == 2: + test_data += cur_group + + train_data = select_paths_with_suitable_samples_and_transform_to_paths_from_root(path_to_dataset_root, train_data) + val_data = select_paths_with_suitable_samples_and_transform_to_paths_from_root(path_to_dataset_root, val_data) + test_data = select_paths_with_suitable_samples_and_transform_to_paths_from_root(path_to_dataset_root, test_data) + + def split_data_to_sat_unsat(data): + sat_data = list(filter(lambda path: path.endswith("-sat"), data)) + unsat_data = list(filter(lambda path: path.endswith("-unsat"), data)) + + return sat_data, unsat_data + + def align_data(data, mode): + sat_data, unsat_data = split_data_to_sat_unsat(data) + sat_data, unsat_data = align_sat_unsat_sizes(sat_data, unsat_data, mode) + + return sat_data + unsat_data + + if align_train_mode != "none": + train_data = align_data(train_data, align_train_mode) + + if align_val_mode != "none": + val_data = align_data(val_data, align_val_mode) + + if align_test_mode != "none": + test_data = align_data(test_data, align_test_mode) + + np.random.shuffle(train_data) + np.random.shuffle(val_data) + np.random.shuffle(test_data) + + return train_data, val_data, test_data + + +def create_split(path_to_dataset_root, val_qty, test_qty, align_train_mode, align_val_mode, align_test_mode, grouped): + if grouped: + train_data, val_data, test_data = grouped_random_split( + path_to_dataset_root, + val_qty, test_qty, + align_train_mode, align_val_mode, align_test_mode + ) + + else: + train_data, val_data, test_data = classic_random_split( + path_to_dataset_root, + val_qty, test_qty, + align_train_mode, align_val_mode, align_test_mode + ) + + print("\nstats:", flush=True) + print(f"train: {len(train_data)}") + print(f"val: {len(val_data)}") + print(f"test: {len(test_data)}") + print(flush=True) + + meta_path = os.path.join(path_to_dataset_root, METADATA_PATH) + os.makedirs(meta_path, exist_ok=True) + + with open(os.path.join(meta_path, "train"), "w") as f: + f.write("\n".join(train_data)) + + if len(train_data): + f.write("\n") + + with open(os.path.join(meta_path, "val"), "w") as f: + f.write("\n".join(val_data)) + + if len(val_data): + f.write("\n") + + with open(os.path.join(meta_path, "test"), "w") as f: + f.write("\n".join(test_data)) + + if len(test_data): + f.write("\n") + + +def get_args(): + parser = ArgumentParser(description="train/val/test splitting script") + + parser.add_argument("--ds", required=True) + + parser.add_argument("--val_qty", type=float, default=0.15) + parser.add_argument("--test_qty", type=float, default=0.1) + + parser.add_argument("--align_train", choices=["none", "upsample", "downsample"], default="upsample") + parser.add_argument("--align_val", choices=["none", "upsample", "downsample"], default="none") + parser.add_argument("--align_test", choices=["none", "upsample", "downsample"], default="none") + + parser.add_argument("--grouped", action="store_true") + + args = parser.parse_args() + print("args:") + for arg in vars(args): + print(arg, "=", getattr(args, arg)) + + print() + + return args + + +if __name__ == "__main__": + seed_everything(24) + + args = get_args() + + create_split( + args.ds, + args.val_qty, args.test_qty, + args.align_train, args.align_val, args.align_test, + args.grouped + ) diff --git a/ksmt-neurosmt/src/main/python/python.iml b/ksmt-neurosmt/src/main/python/python.iml new file mode 100644 index 000000000..f4e6189e6 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/python.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/ksmt-neurosmt/src/main/python/requirements.txt b/ksmt-neurosmt/src/main/python/requirements.txt new file mode 100644 index 000000000..8517d1de2 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/requirements.txt @@ -0,0 +1,7 @@ +tqdm +numpy +scikit-learn +torch +torch_geometric +pytorch-lightning +torchmetrics diff --git a/ksmt-neurosmt/src/main/python/train.py b/ksmt-neurosmt/src/main/python/train.py new file mode 100755 index 000000000..3b0dc8dfe --- /dev/null +++ b/ksmt-neurosmt/src/main/python/train.py @@ -0,0 +1,61 @@ +#!/usr/bin/python3 + +import os; os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"; os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["GPU"] +from argparse import ArgumentParser + +import torch + +from pytorch_lightning import Trainer +from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.callbacks import ModelCheckpoint + +from GlobalConstants import MAX_EPOCHS +from GraphDataloader import get_dataloader +from LightningModel import LightningModel + + +def get_args(): + parser = ArgumentParser(description="main training script") + parser.add_argument("--ds", required=True, nargs="+") + parser.add_argument("--oenc", required=True) + parser.add_argument("--ckpt", required=False) + + args = parser.parse_args() + print("args:") + for arg in vars(args): + print(arg, "=", getattr(args, arg)) + + print() + + return args + + +if __name__ == "__main__": + # enable usage of nvidia's TensorFloat if available + torch.set_float32_matmul_precision("medium") + + args = get_args() + + train_dl = get_dataloader(args.ds, "train", args.oenc) + val_dl = get_dataloader(args.ds, "val", args.oenc) + + pl_model = LightningModel() + trainer = Trainer( + accelerator="auto", + # precision="bf16-mixed", + logger=TensorBoardLogger("../logs", name="neuro-smt"), + callbacks=[ModelCheckpoint( + filename="epoch_{epoch:03d}_roc-auc_{val/roc-auc:.3f}", + monitor="val/roc-auc", + verbose=True, + save_last=True, save_top_k=3, mode="max", + auto_insert_metric_name=False, save_on_train_epoch_end=False + )], + max_epochs=MAX_EPOCHS, + log_every_n_steps=1, + enable_checkpointing=True, + barebones=False, + default_root_dir=".." + ) + + trainer.fit(pl_model, train_dl, val_dl, ckpt_path=args.ckpt) diff --git a/ksmt-neurosmt/src/main/python/utils.py b/ksmt-neurosmt/src/main/python/utils.py new file mode 100644 index 000000000..b4b7d276e --- /dev/null +++ b/ksmt-neurosmt/src/main/python/utils.py @@ -0,0 +1,88 @@ +import os + +import numpy as np +from tqdm import tqdm + +from GlobalConstants import MAX_FORMULA_SIZE, MAX_FORMULA_DEPTH +from GraphReader import read_graph_by_path + + +def train_val_test_indices(cnt: int, val_qty: float = 0.15, test_qty: float = 0.1)\ + -> tuple[np.ndarray, np.ndarray, np.ndarray]: + + perm = np.arange(cnt) + np.random.shuffle(perm) + + val_cnt = int(cnt * val_qty) + test_cnt = int(cnt * test_qty) + + return perm[val_cnt + test_cnt:], perm[:val_cnt], perm[val_cnt:val_cnt + test_cnt] + + +# select paths to suitable samples and transform them to paths from dataset root +def select_paths_with_suitable_samples_and_transform_to_paths_from_root(path_to_dataset_root: str, paths: list[str])\ + -> list[str]: + + print("\nloading paths", flush=True) + + correct_paths = [] + for path in tqdm(paths): + operators, edges, _ = read_graph_by_path(path, max_size=MAX_FORMULA_SIZE, max_depth=MAX_FORMULA_DEPTH) + + if operators is None: + continue + + if len(edges) == 0: + print(f"w: ignoring formula without edges; file '{path}'") + continue + + correct_paths.append(os.path.relpath(path, path_to_dataset_root)) + + return correct_paths + + +def align_sat_unsat_sizes_with_upsamping(sat_data: list[str], unsat_data: list[str]) -> tuple[list[str], list[str]]: + sat_cnt = len(sat_data) + unsat_cnt = len(unsat_data) + + sat_indices = list(range(sat_cnt)) + unsat_indices = list(range(unsat_cnt)) + + if sat_cnt < unsat_cnt: + sat_indices += list(np.random.choice(np.array(sat_indices), unsat_cnt - sat_cnt, replace=True)) + elif sat_cnt > unsat_cnt: + unsat_indices += list(np.random.choice(np.array(unsat_indices), sat_cnt - unsat_cnt, replace=True)) + + return ( + list(np.array(sat_data, dtype=object)[sat_indices]), + list(np.array(unsat_data, dtype=object)[unsat_indices]) + ) + + +def align_sat_unsat_sizes_with_downsamping(sat_data: list[str], unsat_data: list[str]) -> tuple[list[str], list[str]]: + sat_cnt = len(sat_data) + unsat_cnt = len(unsat_data) + + sat_indices = list(range(sat_cnt)) + unsat_indices = list(range(unsat_cnt)) + + if sat_cnt > unsat_cnt: + sat_indices = np.random.choice(np.array(sat_indices), unsat_cnt, replace=False) + elif sat_cnt < unsat_cnt: + unsat_indices = np.random.choice(np.array(unsat_indices), sat_cnt, replace=False) + + return ( + list(np.array(sat_data, dtype=object)[sat_indices]), + list(np.array(unsat_data, dtype=object)[unsat_indices]) + ) + + +def align_sat_unsat_sizes(sat_data: list[str], unsat_data: list[str], mode: str) -> tuple[list[str], list[str]]: + if mode == "none": + return sat_data, unsat_data + elif mode == "upsample": + return align_sat_unsat_sizes_with_upsamping(sat_data, unsat_data) + elif mode == "downsample": + return align_sat_unsat_sizes_with_downsamping(sat_data, unsat_data) + else: + raise Exception(f"unknown sampling mode {mode}") diff --git a/ksmt-neurosmt/src/main/python/validate.py b/ksmt-neurosmt/src/main/python/validate.py new file mode 100755 index 000000000..0d8588897 --- /dev/null +++ b/ksmt-neurosmt/src/main/python/validate.py @@ -0,0 +1,42 @@ +#!/usr/bin/python3 + +import os; os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"; os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["GPU"] +from argparse import ArgumentParser + +import torch + +from pytorch_lightning import Trainer + +from GraphDataloader import get_dataloader +from LightningModel import LightningModel + + +def get_args(): + parser = ArgumentParser(description="validation script") + parser.add_argument("--ds", required=True, nargs="+") + parser.add_argument("--oenc", required=True) + parser.add_argument("--ckpt", required=True) + + args = parser.parse_args() + print("args:") + for arg in vars(args): + print(arg, "=", getattr(args, arg)) + + print() + + return args + + +if __name__ == "__main__": + # enable usage of nvidia's TensorFloat if available + torch.set_float32_matmul_precision("medium") + + args = get_args() + + val_dl = get_dataloader(args.ds, "val", args.oenc) + test_dl = get_dataloader(args.ds, "test", args.oenc) + + trainer = Trainer() + + trainer.validate(LightningModel(), val_dl, args.ckpt) + trainer.test(LightningModel(), test_dl, args.ckpt) diff --git a/ksmt-neurosmt/src/main/resources/conv.onnx b/ksmt-neurosmt/src/main/resources/conv.onnx new file mode 100644 index 000000000..e3bbbc358 Binary files /dev/null and b/ksmt-neurosmt/src/main/resources/conv.onnx differ diff --git a/ksmt-neurosmt/src/main/resources/decoder.onnx b/ksmt-neurosmt/src/main/resources/decoder.onnx new file mode 100644 index 000000000..92dba86f2 Binary files /dev/null and b/ksmt-neurosmt/src/main/resources/decoder.onnx differ diff --git a/ksmt-neurosmt/src/main/resources/embeddings.onnx b/ksmt-neurosmt/src/main/resources/embeddings.onnx new file mode 100644 index 000000000..ef8982499 Binary files /dev/null and b/ksmt-neurosmt/src/main/resources/embeddings.onnx differ diff --git a/ksmt-neurosmt/src/main/resources/encoder-ordinals b/ksmt-neurosmt/src/main/resources/encoder-ordinals new file mode 100644 index 000000000..c051615a6 --- /dev/null +++ b/ksmt-neurosmt/src/main/resources/encoder-ordinals @@ -0,0 +1,39 @@ += +SYMBOLIC;Address +SYMBOLIC;Array +SYMBOLIC;BitVec +SYMBOLIC;Bool +SYMBOLIC;FP +VALUE;Address +VALUE;BitVec +VALUE;Bool +VALUE;FP +VALUE;FP_RM +and +bvadd +bvand +bvashr +bvlshr +bvmul +bvneg +bvnot +bvor +bvsdiv +bvshl +bvsle +bvsrem +extract +fp.add +fp.eq +fp.isNaN +fp.leq +fp.lt +fp.mul +fp.to_fp +ite +not +or +select +sign_extend +store +zero_extend diff --git a/ksmt-neurosmt/utils/build.gradle.kts b/ksmt-neurosmt/utils/build.gradle.kts new file mode 100644 index 000000000..9023512ad --- /dev/null +++ b/ksmt-neurosmt/utils/build.gradle.kts @@ -0,0 +1,61 @@ +plugins { + kotlin("jvm") + application +} + +group = "org.example" +version = "unspecified" + +repositories { + mavenCentral() +} + +dependencies { + implementation(project(":ksmt-core")) + implementation(project(":ksmt-z3")) + implementation(project(":ksmt-runner")) + + implementation("me.tongfei:progressbar:0.9.4") +} + +tasks { + val smt2FatJar = register("smt2FatJar") { + dependsOn.addAll(listOf("compileJava", "compileKotlin", "processResources")) + + archiveFileName.set("convert-smt2.jar") + destinationDirectory.set(File(".")) + duplicatesStrategy = DuplicatesStrategy.EXCLUDE + + manifest { + attributes(mapOf("Main-Class" to "io.ksmt.solver.neurosmt.smt2Converter.SMT2ConverterKt")) + } + + val sourcesMain = sourceSets.main.get() + val contents = configurations.runtimeClasspath.get() + .map { if (it.isDirectory) it else zipTree(it) } + sourcesMain.output + + from(contents) + } + + val ksmtFatJar = register("ksmtFatJar") { + dependsOn.addAll(listOf("compileJava", "compileKotlin", "processResources")) + + archiveFileName.set("convert-ksmt.jar") + destinationDirectory.set(File(".")) + duplicatesStrategy = DuplicatesStrategy.EXCLUDE + + manifest { + attributes(mapOf("Main-Class" to "io.ksmt.solver.neurosmt.ksmtBinaryConverter.KSMTBinaryConverterKt")) + } + + val sourcesMain = sourceSets.main.get() + val contents = configurations.runtimeClasspath.get() + .map { if (it.isDirectory) it else zipTree(it) } + sourcesMain.output + + from(contents) + } + + build { + dependsOn(smt2FatJar, ksmtFatJar) + } +} \ No newline at end of file diff --git a/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/FormulaGraphExtractor.kt b/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/FormulaGraphExtractor.kt new file mode 100644 index 000000000..97ff03201 --- /dev/null +++ b/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/FormulaGraphExtractor.kt @@ -0,0 +1,81 @@ +package io.ksmt.solver.neurosmt + +import io.ksmt.KContext +import io.ksmt.expr.KApp +import io.ksmt.expr.KConst +import io.ksmt.expr.KExpr +import io.ksmt.expr.KInterpretedValue +import io.ksmt.expr.transformer.KNonRecursiveTransformer +import io.ksmt.sort.* +import java.io.OutputStream +import java.util.* + +// serializer of ksmt formula to simple format of formula's structure graph +class FormulaGraphExtractor( + override val ctx: KContext, + val formula: KExpr, + outputStream: OutputStream +) : KNonRecursiveTransformer(ctx) { + + private val exprToVertexID = IdentityHashMap, Long>() + private var currentID = 0L + + private val writer = outputStream.bufferedWriter() + + override fun transformApp(expr: KApp): KExpr { + exprToVertexID[expr] = currentID++ + + when (expr) { + is KInterpretedValue<*> -> writeValue(expr) + is KConst<*> -> writeSymbolicVariable(expr) + else -> writeApp(expr) + } + + writer.newLine() + + return expr + } + + private fun writeSymbolicVariable(symbol: KConst) { + writer.write("SYMBOLIC;") + + val sort = symbol.decl.sort + when (sort) { + is KBoolSort -> writer.write("Bool") + is KBvSort -> writer.write("BitVec") + is KFpSort -> writer.write("FP") + is KFpRoundingModeSort -> writer.write("FP_RM") + is KArraySortBase<*> -> writer.write("Array") + is KUninterpretedSort -> writer.write(sort.name) + else -> error("unknown symbolic sort: ${sort::class.simpleName}") + } + } + + private fun writeValue(value: KInterpretedValue) { + writer.write("VALUE;") + + val sort = value.decl.sort + when (sort) { + is KBoolSort -> writer.write("Bool") + is KBvSort -> writer.write("BitVec") + is KFpSort -> writer.write("FP") + is KFpRoundingModeSort -> writer.write("FP_RM") + is KArraySortBase<*> -> writer.write("Array") + is KUninterpretedSort -> writer.write(sort.name) + else -> error("unknown value sort: ${sort::class.simpleName}") + } + } + + private fun writeApp(expr: KApp) { + writer.write("${expr.decl.name};") + + for (child in expr.args) { + writer.write(" ${exprToVertexID[child]}") + } + } + + fun extractGraph() { + apply(formula) + writer.close() + } +} \ No newline at end of file diff --git a/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/Utils.kt b/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/Utils.kt new file mode 100644 index 000000000..f5eee9357 --- /dev/null +++ b/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/Utils.kt @@ -0,0 +1,79 @@ +package io.ksmt.solver.neurosmt + +import com.jetbrains.rd.framework.SerializationCtx +import com.jetbrains.rd.framework.Serializers +import com.jetbrains.rd.framework.UnsafeBuffer +import io.ksmt.KContext +import io.ksmt.expr.KExpr +import io.ksmt.runner.serializer.AstSerializationCtx +import io.ksmt.solver.KSolverStatus +import io.ksmt.solver.z3.KZ3Solver +import io.ksmt.sort.KBoolSort +import io.ksmt.utils.uncheckedCast +import java.io.File +import java.io.InputStream +import java.io.OutputStream +import java.nio.file.Path +import kotlin.time.Duration + +// read .smt2 file and try to find answer inside it +fun getAnswerForTest(path: Path): KSolverStatus { + File(path.toUri()).useLines { lines -> + for (line in lines) { + when (line) { + "(set-info :status sat)" -> return KSolverStatus.SAT + "(set-info :status unsat)" -> return KSolverStatus.UNSAT + "(set-info :status unknown)" -> return KSolverStatus.UNKNOWN + } + } + } + + return KSolverStatus.UNKNOWN +} + +// solve formula using Z3 solver +fun getAnswerForTest(ctx: KContext, formula: List>, timeout: Duration): KSolverStatus { + return KZ3Solver(ctx).use { solver -> + for (clause in formula) { + solver.assert(clause) + } + + solver.check(timeout = timeout) + } +} + +// serialize ksmt formula to binary file +fun serialize(ctx: KContext, expressions: List>, outputStream: OutputStream) { + val serializationCtx = AstSerializationCtx().apply { initCtx(ctx) } + val marshaller = AstSerializationCtx.marshaller(serializationCtx) + val emptyRdSerializationCtx = SerializationCtx(Serializers()) + + val buffer = UnsafeBuffer(ByteArray(100_000)) + + expressions.forEach { expr -> + marshaller.write(emptyRdSerializationCtx, buffer, expr) + } + + outputStream.write(buffer.getArray()) + outputStream.flush() +} + +// deserialize ksmt formula from binary file +fun deserialize(ctx: KContext, inputStream: InputStream): List> { + val srcSerializationCtx = AstSerializationCtx().apply { initCtx(ctx) } + val srcMarshaller = AstSerializationCtx.marshaller(srcSerializationCtx) + val emptyRdSerializationCtx = SerializationCtx(Serializers()) + + val buffer = UnsafeBuffer(inputStream.readBytes()) + val expressions = mutableListOf>() + + while (true) { + try { + expressions.add(srcMarshaller.read(emptyRdSerializationCtx, buffer).uncheckedCast()) + } catch (e : IllegalStateException) { + break + } + } + + return expressions +} \ No newline at end of file diff --git a/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/ksmtBinaryConverter/KSMTBinaryConverter.kt b/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/ksmtBinaryConverter/KSMTBinaryConverter.kt new file mode 100644 index 000000000..a87cb886e --- /dev/null +++ b/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/ksmtBinaryConverter/KSMTBinaryConverter.kt @@ -0,0 +1,94 @@ +package io.ksmt.solver.neurosmt.ksmtBinaryConverter + +import io.ksmt.KContext +import io.ksmt.solver.KSolverStatus +import io.ksmt.solver.neurosmt.FormulaGraphExtractor +import io.ksmt.solver.neurosmt.deserialize +import io.ksmt.solver.neurosmt.getAnswerForTest +import me.tongfei.progressbar.ProgressBar +import java.io.File +import java.io.FileInputStream +import java.io.FileOutputStream +import java.nio.file.Files +import java.nio.file.Path +import kotlin.io.path.isRegularFile +import kotlin.io.path.name +import kotlin.time.Duration.Companion.seconds + +// tool to convert formulas from ksmt binary format to their structure graphs +fun main(args: Array) { + val inputRoot = args[0] + val outputRoot = args[1] + val timeout = args[2].toInt().seconds + + val files = Files.walk(Path.of(inputRoot)).filter { it.isRegularFile() } + + File(outputRoot).mkdirs() + + var sat = 0; var unsat = 0; var skipped = 0 + + val ctx = KContext(simplificationMode = KContext.SimplificationMode.NO_SIMPLIFY) + + var curIdx = 0 + ProgressBar.wrap(files, "converting ksmt binary files").forEach { + val relFile = it.toFile().relativeTo(File(inputRoot)) + val parentDirFile = if (relFile.parentFile == null) { + "." + } else { + relFile.parentFile.path + } + val outputDir = File(outputRoot, parentDirFile) + outputDir.mkdirs() + + val assertList = try { + deserialize(ctx, FileInputStream(it.toFile())) + } catch (e: Exception) { + skipped++ + return@forEach + } + + val answer = when { + it.name.endsWith("-sat") -> KSolverStatus.SAT + it.name.endsWith("-unsat") -> KSolverStatus.UNSAT + else -> getAnswerForTest(ctx, assertList, timeout) + } + + if (answer == KSolverStatus.UNKNOWN) { + skipped++ + return@forEach + } + + with(ctx) { + val formula = when (assertList.size) { + 0 -> { + skipped++ + return@forEach + } + 1 -> { + assertList[0] + } + else -> { + mkAnd(assertList) + } + } + + val outputFile = File("$outputDir/$curIdx-${answer.toString().lowercase()}") + val outputStream = FileOutputStream(outputFile) + outputStream.write("; $it\n".encodeToByteArray()) + + val extractor = FormulaGraphExtractor(ctx, formula, outputStream) + extractor.extractGraph() + } + + when (answer) { + KSolverStatus.SAT -> sat++ + KSolverStatus.UNSAT -> unsat++ + else -> { /* can't happen */ } + } + + curIdx++ + } + + println() + println("sat: $sat; unsat: $unsat; skipped: $skipped") +} \ No newline at end of file diff --git a/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/smt2Converter/SMT2Converter.kt b/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/smt2Converter/SMT2Converter.kt new file mode 100644 index 000000000..e14b8e0a9 --- /dev/null +++ b/ksmt-neurosmt/utils/src/main/kotlin/io/ksmt/solver/neurosmt/smt2Converter/SMT2Converter.kt @@ -0,0 +1,85 @@ +package io.ksmt.solver.neurosmt.smt2Converter + +import io.ksmt.KContext +import io.ksmt.parser.KSMTLibParseException +import io.ksmt.solver.KSolverStatus +import io.ksmt.solver.neurosmt.FormulaGraphExtractor +import io.ksmt.solver.neurosmt.getAnswerForTest +import io.ksmt.solver.z3.KZ3SMTLibParser +import me.tongfei.progressbar.ProgressBar +import java.io.File +import java.io.FileOutputStream +import java.nio.file.Files +import java.nio.file.Path +import kotlin.io.path.isRegularFile +import kotlin.io.path.name + +// tool to convert formulas from .smt2 format to their structure graphs +fun main(args: Array) { + val inputRoot = args[0] + val outputRoot = args[1] + + val files = Files.walk(Path.of(inputRoot)).filter { it.isRegularFile() } + + var ok = 0; var fail = 0 + var sat = 0; var unsat = 0; var skipped = 0 + + val ctx = KContext(simplificationMode = KContext.SimplificationMode.NO_SIMPLIFY) + + var curIdx = 0 + ProgressBar.wrap(files, "converting smt2 files").forEach { + if (!it.name.endsWith(".smt2")) { + return@forEach + } + + val answer = getAnswerForTest(it) + + if (answer == KSolverStatus.UNKNOWN) { + skipped++ + return@forEach + } + + with(ctx) { + val formula = try { + val assertList = KZ3SMTLibParser(ctx).parse(it) + when (assertList.size) { + 0 -> { + skipped++ + return@forEach + } + 1 -> { + ok++ + assertList[0] + } + else -> { + ok++ + mkAnd(assertList) + } + } + } catch (e: KSMTLibParseException) { + fail++ + e.printStackTrace() + return@forEach + } + + val outputFile = File("$outputRoot/$curIdx-${answer.toString().lowercase()}") + val outputStream = FileOutputStream(outputFile) + outputStream.write("; $it\n".encodeToByteArray()) + + val extractor = FormulaGraphExtractor(ctx, formula, outputStream) + extractor.extractGraph() + } + + when (answer) { + KSolverStatus.SAT -> sat++ + KSolverStatus.UNSAT -> unsat++ + else -> { /* can't happen */ } + } + + curIdx++ + } + + println() + println("processed: $ok; failed: $fail") + println("sat: $sat; unsat: $unsat; skipped: $skipped") +} \ No newline at end of file diff --git a/settings.gradle.kts b/settings.gradle.kts index c6d4d64de..1a9014436 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -17,3 +17,6 @@ pluginManagement { } } } +include("ksmt-neurosmt") +include("ksmt-neurosmt:utils") +// findProject(":ksmt-neurosmt:utils")?.name = "utils"