Skip to content

Commit

Permalink
[gosrc2cpg] - Made structural changes while creating initial AST node…
Browse files Browse the repository at this point in the history
…s per file joernio#3695 (joernio#3734)

Earlier we were creating AST structure per file in following way.

NameSpaceNode(`package prefixed with filename`)=>TypeDecl(`Fake TypeDecl
node representing the given file`)=>Method(`Fake Method node
representing the statements (imports, global var and constants, types and
functions) in the given file`)=>Block(`all the direct statement ASTs as
child nodes`)

With new structure, we wanted to create Fake TypeDecl node representing
one package (One package can have multiple go files in it). In order to
achieve this, we are following order of nodes being created for package
and then for a file.

For package, i.e. per folder

NameSpaceNode(`one namespace node representing one
package`)=>TypeDecl(`Fake TypeDecl node representing the give package.
File path mentioned in this case will be the folder path`)

For each file in the package. The first fake node will be added as child
node of the Package TypeDecl created in the previous cache pass.

Method(`Fake Method node representing the statements (imports, global var
and constants, types and functions) in the given file`)=>Block(`all the
direct statement asts as child nodes`)

- Added few more unit tests covering the new use cases
- Changed the existing unit tests to adjust with updated AST structure.
- A few memory optimisations to exclude some part of processing while
processing third party dependencies

TODO:

- Handle the global variable and constants to be created as member
fields of the package level Fake TypeDecl we are creating. As of now it's
being created as `LOCAL` node under Fake file level `METHOD` node we have
created.
- If the Global variable or constant is getting accessed in the same
package. As of now that is being treated as an `IDENTIFIER`, we need
change to be converted to FieldAccess `CALL` node.
- With the above change, we might not require the File level fake
`METHOD` node as for Global variable or constants we are going to treat
them as Member fields of level Type Decl instead of `LOCAL` nodes. So we
might get rid of this file-level Fake `METHOD` node.
  • Loading branch information
pandurangpatil authored Oct 12, 2023
1 parent 9644ce8 commit 5f59dab
Show file tree
Hide file tree
Showing 13 changed files with 155 additions and 161 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ class GoSrc2Cpg extends X2CpgFrontend[Config] {
astGenResult.parsedModFile.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x))
)
if config.fetchDependencies then new DownloadDependenciesPass(goMod).process()
val astCreators = new MethodAndTypeCacheBuilderPass(astGenResult.parsedFiles, config, goMod).process()
val astCreators =
new MethodAndTypeCacheBuilderPass(Some(cpg), astGenResult.parsedFiles, config, goMod).process()
new AstCreationPass(cpg, astCreators, config, report).createAndApply()
// TypeNodePass.withRegisteredTypes(GoGlobal.typesSeen(), cpg).createAndApply()
report.print()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
package io.joern.gosrc2cpg.astcreation

import io.joern.gosrc2cpg.model.{GoMod, GoModHelper}
import io.joern.gosrc2cpg.model.GoModHelper
import io.joern.gosrc2cpg.parser.GoAstJsonParser.ParserResult
import io.joern.gosrc2cpg.parser.ParserAst.*
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.datastructures.Scope
import io.joern.x2cpg.datastructures.Stack.*
import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode, AstNodeBuilder as X2CpgAstNodeBuilder}
import io.shiftleft.codepropertygraph.generated.NodeTypes
import io.shiftleft.codepropertygraph.generated.nodes.{NewNamespaceBlock, NewNode}
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal
import io.shiftleft.codepropertygraph.generated.nodes.NewNode
import org.slf4j.{Logger, LoggerFactory}
import overflowdb.BatchedUpdate.DiffGraphBuilder
import ujson.Value
Expand Down Expand Up @@ -47,45 +46,27 @@ class AstCreator(val relPathFileName: String, val parserResult: ParserResult, go
}

private def astForTranslationUnit(rootNode: ParserNodeInfo): Ast = {
val namespaceBlock = NewNamespaceBlock()
.name(fullyQualifiedPackage)
.fullName(s"$relPathFileName:$fullyQualifiedPackage")
.filename(relPathFileName)
methodAstParentStack.push(namespaceBlock)
val rootAst = Ast(namespaceBlock).withChild(
astInFakeMethod(
fullyQualifiedPackage + "." + NamespaceTraversal.globalNamespaceName,
namespaceBlock.fullName + "." + NamespaceTraversal.globalNamespaceName,
val name = s"$fullyQualifiedPackage.${parserResult.filename}"
val fullName = s"$relPathFileName:$name"
val fakeGlobalMethodForFile =
methodNode(
rootNode,
name,
name,
fullName,
None,
relPathFileName,
rootNode
Option(NodeTypes.TYPE_DECL),
Option(fullyQualifiedPackage)
)
)
methodAstParentStack.pop()
rootAst
}

/** Creates an AST of all declarations found in the translation unit - wrapped in a fake method.
*/
private def astInFakeMethod(name: String, fullName: String, path: String, rootNode: ParserNodeInfo): Ast = {

val fakeGlobalTypeDecl =
typeDeclNode(rootNode, name, fullName, relPathFileName, name, NodeTypes.NAMESPACE_BLOCK, fullName)
methodAstParentStack.push(fakeGlobalTypeDecl)
val fakeGlobalMethod =
methodNode(rootNode, name, name, fullName, None, path, Option(NodeTypes.TYPE_DECL), Option(fullName))
methodAstParentStack.push(fakeGlobalMethod)
scope.pushNewScope(fakeGlobalMethod)
val blockNode_ = blockNode(rootNode, Defines.empty, Defines.anyTypeName)

methodAstParentStack.push(fakeGlobalMethodForFile)
scope.pushNewScope(fakeGlobalMethodForFile)
val blockNode_ = blockNode(rootNode, Defines.empty, Defines.anyTypeName)
val methodReturn = methodReturnNode(rootNode, Defines.anyTypeName)
val declsAsts = rootNode.json(ParserKeys.Decls).arr.flatMap(item => astForNode(item)).toList
val ast = Ast(fakeGlobalTypeDecl).withChild(
methodAst(fakeGlobalMethod, Seq.empty, blockAst(blockNode_, declsAsts), methodReturn)
)
methodAstParentStack.pop()
methodAstParentStack.pop()
scope.popScope()
ast
methodAst(fakeGlobalMethodForFile, Seq.empty, blockAst(blockNode_, declsAsts), methodReturn)
}

protected def astForNode(nodeInfo: ParserNodeInfo): Seq[Ast] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,6 @@ import scala.collection.mutable.{ArrayBuffer, ListBuffer}
import scala.util.{Failure, Success, Try}

trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>
private def createFunctionTypeAndTypeDecl(
node: ParserNodeInfo,
method: NewMethod,
methodName: String,
methodFullName: String,
signature: String
): Ast = {

val parentNode: NewTypeDecl = methodAstParentStack.collectFirst { case t: NewTypeDecl => t }.getOrElse {
// TODO: Need to add respective Unit test to test this possibility, as looks to me as dead code.
// Replicated it from 'c2cpg' by referring AstForFunctionsCreator.
val astParentType = methodAstParentStack.head.label
val astParentFullName = methodAstParentStack.head.properties(PropertyNames.FULL_NAME).toString
val typeDeclNode_ =
typeDeclNode(node, methodName, methodFullName, method.filename, methodName, astParentType, astParentFullName)
Ast.storeInDiffGraph(Ast(typeDeclNode_), diffGraph)
typeDeclNode_
}

val functionBinding = NewBinding().name(methodName).methodFullName(methodFullName).signature(signature)
Ast(functionBinding).withBindsEdge(parentNode, functionBinding).withRefEdge(functionBinding, method)
}

def astForFuncDecl(funcDecl: ParserNodeInfo): Seq[Ast] = {
val (name, methodFullname, signature, params, receiverInfo, genericTypeMethodMap) = processFuncDecl(funcDecl.json)
Expand All @@ -61,10 +39,10 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
case Some(_, typeFullName, _, _) =>
// if method is related to Struct then fill astParentFullName and astParentType
methodNode_.astParentType(NodeTypes.TYPE_DECL).astParentFullName(typeFullName)
Ast.storeInDiffGraph(astForMethod, diffGraph)
Seq.empty
case _ =>
Seq(astForMethod)
methodNode_.astParentType(NodeTypes.TYPE_DECL).astParentFullName(fullyQualifiedPackage)
Ast.storeInDiffGraph(astForMethod, diffGraph)
Seq.empty
}

private def astForReceiver(receiverInfo: Option[(String, String, String, ParserNodeInfo)]): Seq[Ast] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,57 @@ package io.joern.gosrc2cpg.astcreation
import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.parser.ParserAst.{GenDecl, ValueSpec}
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.Ast
import io.joern.gosrc2cpg.utils.UtilityConstants.fileSeparateorPattern
import io.joern.x2cpg.{Ast, ValidationMode}
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.DiffGraphBuilder
import io.shiftleft.codepropertygraph.generated.nodes.NewNamespaceBlock
import ujson.{Arr, Obj, Value}

import java.io.File
import scala.util.Try

trait CacheBuilder { this: AstCreator =>
trait CacheBuilder(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>

def buildCache(): Unit = {
def buildCache(cpgOpt: Option[Cpg]): DiffGraphBuilder = {
val diffGraph = new DiffGraphBuilder
try {
// Declared package name and namespace ending folder token is not matching then cache the alias to namespace mapping
if (!fullyQualifiedPackage.endsWith(declaredPackageName)) {
GoGlobal.recordAliasToNamespaceMapping(declaredPackageName, fullyQualifiedPackage)
}

cpgOpt.map(_ => {
// We don't want to process this part when third party dependencies are being processed.
val result = GoGlobal.recordAliasToNamespaceMapping(declaredPackageName, fullyQualifiedPackage)
if (result == null) {
// if result is null that means item got added first time otherwise it has been already added to global map
val rootNode = createParserNodeInfo(parserResult.json)
val ast = astForPackage(rootNode)
Ast.storeInDiffGraph(ast, diffGraph)
}
})

findAndProcess(parserResult.json)
processPackageLevelGolbalVaraiblesAndConstants(parserResult.json)
} catch
} catch {
case ex: Exception =>
logger.warn(s"Error: While processing - ${parserResult.fullPath}", ex)
}
diffGraph
}

private def astForPackage(rootNode: ParserNodeInfo): Ast = {
val pathTokens = relPathFileName.split(fileSeparateorPattern)
val packageFolderPath = if (pathTokens.nonEmpty && pathTokens.size > 1) {
s"${File.separator}${pathTokens.dropRight(1).mkString(File.separator)}"
} else {
s"${File.separator}"
}

val namespaceBlock = NewNamespaceBlock()
.name(fullyQualifiedPackage)
.fullName(fullyQualifiedPackage)
.filename(packageFolderPath)
val fakePackageTypeDecl =
typeDeclNode(rootNode, fullyQualifiedPackage, fullyQualifiedPackage, packageFolderPath, fullyQualifiedPackage)
Ast(namespaceBlock).withChild(Ast(fakePackageTypeDecl))
}

private def processPackageLevelGolbalVaraiblesAndConstants(json: Value): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ object GoGlobal extends Global {
*/
val structTypeMemberTypeMapping: ConcurrentHashMap[String, String] = new ConcurrentHashMap()

def recordAliasToNamespaceMapping(alias: String, namespace: String): Unit = {
def recordAliasToNamespaceMapping(alias: String, namespace: String): String = {
aliasToNameSpaceMapping.putIfAbsent(alias, namespace)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,8 @@ import io.circe.{Decoder, HCursor}
import io.joern.gosrc2cpg.Config
import io.joern.gosrc2cpg.utils.UtilityConstants.fileSeparateorPattern

import java.io.File
import scala.collection.mutable.ListBuffer

class GoModHelper(config: Option[Config] = None, meta: Option[GoMod] = None) {

import java.util.regex.Pattern

def getModMetaData(): Option[GoMod] = meta
def getNameSpace(compilationUnitFilePath: String, pkg: String): String = {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class DownloadDependenciesPass(parentGoMod: GoModHelper) {
Some(config),
astGenResult.parsedModFile.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x))
)
new MethodAndTypeCacheBuilderPass(astGenResult.parsedFiles, config, goMod).process()
new MethodAndTypeCacheBuilderPass(None, astGenResult.parsedFiles, config, goMod).process()
}
}
}
Original file line number Diff line number Diff line change
@@ -1,30 +1,40 @@
package io.joern.gosrc2cpg.passes

import io.joern.gosrc2cpg.Config
import io.joern.gosrc2cpg.astcreation.{AstCreator, CacheBuilder}
import io.joern.gosrc2cpg.astcreation.AstCreator
import io.joern.gosrc2cpg.model.GoModHelper
import io.joern.gosrc2cpg.parser.GoAstJsonParser
import io.joern.gosrc2cpg.parser.GoAstJsonParser.ParserResult
import io.joern.x2cpg.SourceFiles
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.DiffGraphBuilder

import java.nio.file.Paths
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}

class MethodAndTypeCacheBuilderPass(astFiles: List[String], config: Config, goMod: GoModHelper) {
class MethodAndTypeCacheBuilderPass(cpgOpt: Option[Cpg], astFiles: List[String], config: Config, goMod: GoModHelper) {
def process(): Seq[AstCreator] = {
val futures = astFiles
.map(file => {
Future {
val parserResult = GoAstJsonParser.readFile(Paths.get(file))
val relPathFileName = SourceFiles.toRelativePath(parserResult.fullPath, config.inputPath)
val astCreator = new AstCreator(relPathFileName, parserResult, goMod)(config.schemaValidation)
astCreator.buildCache()
astCreator
val diffGraph = astCreator.buildCache(cpgOpt)
(astCreator, diffGraph)
}
})
val allResults: Future[List[AstCreator]] = Future.sequence(futures)
Await.result(allResults, Duration.Inf)
val allResults: Future[List[(AstCreator, DiffGraphBuilder)]] = Future.sequence(futures)
val results = Await.result(allResults, Duration.Inf)
val (astCreators, diffGraphs) = results.unzip
cpgOpt.map(cpg => {
diffGraphs.foreach(diffGraph => {
overflowdb.BatchedUpdate
.applyDiff(cpg.graph, diffGraph, null, null)
.transitiveModifications()
})
})
astCreators
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ class ArraysAndMapTests extends GoCodeToCpgSuite {
)

"check FILE Nodes" in {
cpg.file.size shouldBe 3
cpg.file.size shouldBe 4
}

"check LOCAL node" in {
Expand Down Expand Up @@ -422,7 +422,7 @@ class ArraysAndMapTests extends GoCodeToCpgSuite {
)

"check FILE Nodes" in {
cpg.file.size shouldBe 3
cpg.file.size shouldBe 5
}

"check LOCAL node" in {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ class FileTests extends GoCodeToCpgSuite {
|""".stripMargin)

"should contain two file nodes in total, both with order=0" in {
cpg.file.order.l shouldBe List(0, 0)
cpg.file.order.l shouldBe List(0, 0, 0)
cpg.file.name(FileTraversal.UNKNOWN).size shouldBe 1
cpg.file.nameNot(FileTraversal.UNKNOWN).size shouldBe 1
cpg.file.nameNot(FileTraversal.UNKNOWN).size shouldBe 2
}

"should contain exactly one placeholder file node with `name=\"<unknown>\"/order=0`" in {
Expand All @@ -33,11 +33,7 @@ class FileTests extends GoCodeToCpgSuite {
}

"should allow traversing from file to its methods via namespace block" in {
cpg.file.nameNot(FileTraversal.UNKNOWN).method.name.toSetMutable shouldBe Set(
s"main.${NamespaceTraversal.globalNamespaceName}",
"foo",
"bar"
)
cpg.file.nameNot(FileTraversal.UNKNOWN).method.name.toSetMutable shouldBe Set("main.Test0.go", "foo", "bar")
}

"should allow traversing from file to its type declarations via namespace block" in {
Expand All @@ -46,15 +42,15 @@ class FileTests extends GoCodeToCpgSuite {
.typeDecl
.name
.l
.sorted shouldBe List("Sample", "main.<global>")
.sorted shouldBe List("Sample", "main")
}

"should allow traversing to namespaces" in {
val List(ns1, ns2) = cpg.file.namespaceBlock.l
ns1.filename shouldBe FileTraversal.UNKNOWN
ns1.fullName shouldBe NamespaceTraversal.globalNamespaceName
ns2.filename shouldBe "Test0.go"
ns2.fullName shouldBe "Test0.go:main"
ns2.filename shouldBe File.separator
ns2.fullName shouldBe "main"
cpg.file.namespace.l.size shouldBe 2
}
}
Expand Down Expand Up @@ -86,22 +82,19 @@ class FileTests extends GoCodeToCpgSuite {
)

"should contain two file nodes in total, both with order=0" in {
cpg.file.order.l shouldBe List(0, 0, 0)
cpg.file.order.l shouldBe List(0, 0, 0, 0, 0)
cpg.file.name(FileTraversal.UNKNOWN).size shouldBe 1
cpg.file.nameNot(FileTraversal.UNKNOWN).size shouldBe 2
cpg.file.nameNot(FileTraversal.UNKNOWN).size shouldBe 4
}

"traversal from file to typedecl should work" in {
cpg.file(".*mainlib.go").size shouldBe 1
cpg.file(".*mainlib.go").typeDecl.fullName.l shouldBe List(
s"${Seq("fpkg", "mainlib.go").mkString(File.separator)}:joern.io/sample/fpkg.<global>",
"joern.io/sample/fpkg.Person"
)
cpg.file(".*fpkg.*").size shouldBe 2
cpg.file(".*fpkg.*").typeDecl.fullName.l shouldBe List("joern.io/sample/fpkg", "joern.io/sample/fpkg.Person")
}

"traversal from file to method should work" in {
cpg.file("main.go").size shouldBe 1
cpg.file("main.go").method.fullName.l shouldBe List("main.go:main.<global>", "main.foo")
cpg.file("main.go").method.fullName.l shouldBe List("main.foo", "main.go:main.main.go")
}
}
}
Loading

0 comments on commit 5f59dab

Please sign in to comment.