diff --git a/build.sbt b/build.sbt index 627a868..9032fa1 100644 --- a/build.sbt +++ b/build.sbt @@ -21,7 +21,7 @@ libraryDependencies ++= Seq( "com.typesafe.akka" %% "akka-http-caching" % AkkaHttpVersion, "com.typesafe.akka" %% "akka-http-spray-json" % AkkaHttpVersion, "ch.qos.logback" % "logback-classic" % LogbackVersion, - "org.tensorflow" % "tensorflow-core-platform-gpu" % "0.4.1" + "org.tensorflow" % "tensorflow-core-platform" % "1.1.0" ) assembly / mainClass := Some("serving.http.HttpServer") diff --git a/src/main/scala/serving/config/ConfigManager.scala b/src/main/scala/serving/config/ConfigManager.scala index e05ebb8..3179c9e 100644 --- a/src/main/scala/serving/config/ConfigManager.scala +++ b/src/main/scala/serving/config/ConfigManager.scala @@ -7,8 +7,8 @@ object ConfigManager { val takeSpinCountDelay : Int = System.getProperty("takeSpinCountDelay","5").toInt val topK: Int = System.getProperty("topK","10").toInt - val dim: Int = System.getProperty("dim","100").toInt - val sample: Int = System.getProperty("sample","10000").toInt + val dim: Int = System.getProperty("dim","1000").toInt + val sample: Int = System.getProperty("sample","10000000").toInt val batch: Int = System.getProperty("batch","16").toInt val npyFile: String = System.getProperty("npyFile", "./model/10000-100.npy") diff --git a/src/main/scala/serving/model/CosineSimilarity.scala b/src/main/scala/serving/model/CosineSimilarity.scala index 5e9a172..df1ece7 100644 --- a/src/main/scala/serving/model/CosineSimilarity.scala +++ b/src/main/scala/serving/model/CosineSimilarity.scala @@ -7,12 +7,13 @@ import org.tensorflow.ndarray.Shape import org.tensorflow.ndarray.buffer.DataBuffers import org.tensorflow.op.Ops import org.tensorflow.op.core.{Constant, Placeholder} +import org.tensorflow.op.nn.TopK import org.tensorflow.types.TFloat32 import serving.config.ConfigManager import serving.tensor.TensorFlowProvider import serving.tensor.InputTensor - +import java.nio.{ByteBuffer, ByteOrder} import java.nio.file.Paths final case class Vec(vec: Seq[(String, Array[Float])]) @@ -37,21 +38,47 @@ object CosineSimilarity { def model(k: Int): Graph = { - val wArray: Array[Float] = dataVectorNumpyArray + //val wArray: Array[Float] = dataVectorNumpyArray + //val wArray: Array[Float] = Array.fill(dim * wLength)(0.0f) + + val wArrayLength:Long = dim.toLong * wLength.toLong - assert(wArray.length == (dim * wLength), f"${wArray.length} != ${dim} * ${wLength} npy file does not match size and dimension and sample length. ") + //assert(wArray.length == (dim * wLength), f"${wArray.length} != ${dim} * ${wLength} npy file does not match size and dimension and sample length. ") val graph = new Graph() val tf = Ops.create(graph) - val wTensor: Constant[TFloat32] = { - val fp32Buf = DataBuffers.ofFloats(wLength * dim).write(wArray, 0, wLength * dim) - tf.constant(Shape.of(1, dim, wLength), fp32Buf) + + val wTensor = TFloat32.tensorOf(Shape.of(1, dim, wLength)) + + + val chunkSize:Long = 100000 + var offset:Long = 0 + + while (offset < wArrayLength) { + val length = Math.min(chunkSize, wArrayLength - offset) + println(length) + // 100만 개씩 데이터를 복사하여 바이트버퍼로 변환 + + val byteBuffer = ByteBuffer.allocateDirect((length * 4).toInt).order(ByteOrder.nativeOrder()) + val byteArray = new Array[Byte]((length * 4).toInt) + byteBuffer.put(byteArray) + + // wTensor에 기록 + wTensor.asRawTensor().data().write(byteArray) + + offset += length + + val totalCopied = (100000 * (offset / 100000)) + length + println(s"Total copied elements: $totalCopied, wArray length: ${wArrayLength}") } + + + val vTensor = tf.withName("input").placeholder(classOf[TFloat32], Placeholder.shape(Shape.of(-1, dim, 1))) - val mul = tf.math.mul(vTensor, wTensor) + val mul = tf.math.mul(vTensor, tf.constant(wTensor)) val cosineSimilarity = tf.reduceSum(mul, tf.array(1)) - val nnTopK = tf.withName("output").nn.topK(cosineSimilarity, tf.constant(k)) + val nnTopK = tf.withName("output").nn.topK(cosineSimilarity, tf.constant(k), Array.empty[TopK.Options]) graph } @@ -103,4 +130,8 @@ object CosineSimilarity { run(v, k = topK) } + def main(args: Array[String]): Unit = { + val a = run(Array(Array.fill(100)(0.0f)),k = 5) + a.foreach(x=>x.foreach(println)) +} } diff --git a/src/main/scala/serving/tensor/TensorFlowProvider.scala b/src/main/scala/serving/tensor/TensorFlowProvider.scala index 09c44a5..2ba68b2 100644 --- a/src/main/scala/serving/tensor/TensorFlowProvider.scala +++ b/src/main/scala/serving/tensor/TensorFlowProvider.scala @@ -27,7 +27,7 @@ class TensorFlowProvider(graph: Graph) extends AutoCloseable { // run. This takes most of the time val resultTensors: Map[(String, Int), Tensor] = (runner.run().asScala zip outputs) - .map(x => (x._2._1, x._2._2) -> x._1).toMap + .map(x => (x._2._1, x._2._2) -> x._1).map(x=>(x._1,x._2.getValue)).toMap //release inputTensors.foreach(_.tensor.close())