Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update licenses.json and parsing without xml conversion #94

Merged
merged 6 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import sbt._

object Dependencies {
lazy val library = Seq(
"io.circe" %% "circe-generic" % "0.14.10",
"io.circe" %% "circe-parser" % "0.14.10",
"org.cyclonedx" % "cyclonedx-core-java" % "9.1.0",
"org.scalatest" %% "scalatest" % "3.2.19" % Test,
"org.scalamock" %% "scalamock" % "6.0.0" % Test
Expand Down
9,172 changes: 5,932 additions & 3,240 deletions src/main/resources/licenses.json

Large diffs are not rendered by default.

5 changes: 1 addition & 4 deletions src/main/resources/licenses.readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
Updated version of licenses.json file may be found here:
https://github.com/spdx/license-list-data

licenses.xml is generated from original licenses.json using online xml converter:
http://convertjson.com/json-to-xml.htm
https://github.com/spdx/license-list-data/blob/main/json/licenses.json
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 much nicer

4,887 changes: 0 additions & 4,887 deletions src/main/resources/licenses.xml

This file was deleted.

2 changes: 1 addition & 1 deletion src/main/scala/com/github/sbt/sbom/licenses/License.scala
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
package com.github.sbt.sbom.licenses

final case class License(id: Option[String] = None, name: Option[String] = None, references: Seq[String] = Seq())
final case class License(id: String, name: String, references: Seq[String])
33 changes: 23 additions & 10 deletions src/main/scala/com/github/sbt/sbom/licenses/LicensesArchive.scala
Original file line number Diff line number Diff line change
@@ -1,23 +1,36 @@
package com.github.sbt.sbom.licenses

import com.github.sbt.sbom.licenses.LicensesArchive.normalizeUrl

import scala.io.Source

class LicensesArchive(licenses: Seq[License]) {
private val licensesByUrl: Map[String, License] = licenses.foldLeft(Map[String, License]()) { (map, license) =>
map ++ license.references.foldLeft(Map[String, License]()) { (map, ref) =>
map + (ref -> license)
}
}
private val licensesByNormalizedUrl: Map[String, License] =
licenses.iterator.flatMap { license =>
license.references.map { reference =>
(normalizeUrl(reference), license)
}
}.toMap

def findByUrl(url: String): Option[License] = licensesByUrl.get(url)
def findByNormalizedUrl(url: String): Option[License] = licensesByNormalizedUrl.get(normalizeUrl(url))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need this method? It seems only to be used in tests, or am I misunderstanding that?


def findById(id: String): Option[License] = licenses.find(_.id.contains(id))
}

object LicensesArchive {
private lazy val fileStream = getClass.getResourceAsStream("/licenses.xml")
private lazy val archiveText = Source.fromInputStream(fileStream).mkString
private lazy val archive = new LicensesArchive(new LicensesArchiveParser(archiveText).licenses)
private def normalizeUrl(url: String): String = url.toLowerCase
.replaceFirst("^https://", "http://")
.replaceFirst("\\.html$", "")
.replaceFirst("\\.txt$", "")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this safe? Wouldn't this risk changing the URL in a way that might not resolve anymore?

If we do do this, wouldn't it be nicer to normalize towards using https instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The normalization is really only used for comparing urls. Sadly the licenses that sbt returns often don't match with the urls from the license.json. using these three replacements got me really far to match most license ids. Yes https would probably be better even if it doesn't really matter that much.


private def loadResourceAsString(resource: String): String = {
val fileStream = getClass.getResourceAsStream(resource)
Source.fromInputStream(fileStream).mkString
}

def fromJsonString(json: String): LicensesArchive =
new LicensesArchive(LicensesArchiveJsonParser.parseString(json))

def findByUrl(url: String): Option[License] = archive.findByUrl(url)
lazy val bundled: LicensesArchive =
fromJsonString(loadResourceAsString("/licenses.json"))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.github.sbt.sbom.licenses

import io.circe.Decoder
import io.circe.generic.semiauto.deriveDecoder
import io.circe.parser.*

import scala.util.control.NonFatal

private[licenses] object LicensesArchiveJsonParser {
private case class LicenseJson(
licenseId: String,
name: String,
seeAlso: Seq[String]
)

private object LicenseJson {
implicit val decoder: Decoder[LicenseJson] = deriveDecoder
}

private case class LicensesArchiveJson(
licenses: Seq[LicenseJson]
)

private object LicensesArchiveJson {
implicit val decoder: Decoder[LicensesArchiveJson] = deriveDecoder
}

private def licenseFromLicenseEntry(licenseEntry: LicenseJson): License = License(
id = licenseEntry.licenseId,
name = licenseEntry.name,
references = licenseEntry.seeAlso
)

def parseString(string: String): Seq[License] = {
val licensesArchiveJson =
try {
decode[LicensesArchiveJson](string).toTry.get
} catch {
case NonFatal(e) => throw new RuntimeException("failed to parse licenses archive json", e)
}

licensesArchiveJson.licenses.map(licenseFromLicenseEntry)
}
}

This file was deleted.

96 changes: 52 additions & 44 deletions src/test/scala/com/github/sbt/sbom/LicensesArchiveSpec.scala
Original file line number Diff line number Diff line change
@@ -1,83 +1,91 @@
package com.github.sbt.sbom

import com.github.sbt.sbom.licenses.{ LicensesArchive, LicensesArchiveParser }
import com.github.sbt.sbom.licenses.LicensesArchive
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

class LicensesArchiveSpec extends AnyWordSpec with Matchers {
"LicensesArchiveParser" should {
"fail parsing a not valid archive" in {
new LicensesArchiveParser("").isValid shouldBe false
assertThrows[RuntimeException] {
LicensesArchive.fromJsonString("")
}
}

"parse a valid archive" in {
new LicensesArchiveParser(xml).isValid shouldBe true
LicensesArchive.fromJsonString(json)
}
}

"LicenseRegister" should {
"find no license by ref" in {
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses)
register.findByUrl("http://www.domain.com/missingLicense") shouldBe None
val register = LicensesArchive.fromJsonString(json)
register.findByNormalizedUrl("http://www.domain.com/missingLicense") shouldBe None
}

"find licenses by ref" in {
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses)
val gps2 = register.findByUrl("http://www.opensource.org/licenses/GPL-2.0")
val zeroBsd = register.findByUrl("http://landley.net/toybox/license.html")
val register = LicensesArchive.fromJsonString(json)
val gps2 = register.findByNormalizedUrl("https://opensource.org/licenses/GPL-2.0")
val zeroBsd = register.findByNormalizedUrl("http://landley.net/toybox/license.html")

gps2.isDefined shouldBe true
gps2.get.id shouldBe Some("GPL-2.0")
gps2.get.id shouldBe "GPL-2.0-or-later"
zeroBsd.isDefined shouldBe true
zeroBsd.get.id shouldBe Some("0BSD")
zeroBsd.get.id shouldBe "0BSD"
}

"find no licenses by id" in {
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses)
val register = LicensesArchive.fromJsonString(json)
register.findById("an invalid id") shouldBe None
}

"shoud read licenses from resource file" in {
val gpl2OrLater = LicensesArchive.findByUrl("https://opensource.org/licenses/GPL-2.0")
val gpl2OrLater = LicensesArchive.bundled.findByNormalizedUrl("https://opensource.org/licenses/GPL-2.0")
gpl2OrLater.isDefined shouldBe true
gpl2OrLater.get.id shouldBe Some("GPL-2.0")
gpl2OrLater.get.id shouldBe "GPL-2.0-or-later"
}

"find licenses by id" in {
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses)
val gpl2 = register.findById("GPL-2.0")
val register = LicensesArchive.fromJsonString(json)
val gpl2 = register.findById("GPL-2.0-or-later")
gpl2.isDefined shouldBe true
gpl2.get.id shouldBe Some("GPL-2.0")
gpl2.get.id shouldBe "GPL-2.0-or-later"
}
}

val xml: String =
"""
|<root>
| <licenseListVersion>v3.4-5-gb3d735f</licenseListVersion>
| <licenses>
| <reference>./0BSD.html</reference>
| <isDeprecatedLicenseId>false</isDeprecatedLicenseId>
| <detailsUrl>http://spdx.org/licenses/0BSD.json</detailsUrl>
| <referenceNumber>310</referenceNumber>
| <name>BSD Zero Clause License</name>
| <licenseId>0BSD</licenseId>
| <seeAlso>http://landley.net/toybox/license.html</seeAlso>
| <isOsiApproved>true</isOsiApproved>
| </licenses>
| <licenses>
| <reference>./GPL-2.0.html</reference>
| <isDeprecatedLicenseId>true</isDeprecatedLicenseId>
| <isFsfLibre>true</isFsfLibre>
| <detailsUrl>http://spdx.org/licenses/GPL-2.0.json</detailsUrl>
| <referenceNumber>140</referenceNumber>
| <name>GNU General Public License v2.0 only</name>
| <licenseId>GPL-2.0</licenseId>
| <seeAlso>http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html</seeAlso>
| <seeAlso>http://www.opensource.org/licenses/GPL-2.0</seeAlso>
| <isOsiApproved>true</isOsiApproved>
| </licenses>
| <releaseDate>2019-01-16</releaseDate>
|</root>
lazy val json: String =
"""{
| "licenseListVersion": "b5a3b2e",
| "licenses": [
| {
| "reference": "https://spdx.org/licenses/0BSD.html",
| "isDeprecatedLicenseId": false,
| "detailsUrl": "https://spdx.org/licenses/0BSD.json",
| "referenceNumber": 430,
| "name": "BSD Zero Clause License",
| "licenseId": "0BSD",
| "seeAlso": [
| "http://landley.net/toybox/license.html",
| "https://opensource.org/licenses/0BSD"
| ],
| "isOsiApproved": true
| },
| {
| "reference": "https://spdx.org/licenses/GPL-2.0-or-later.html",
| "isDeprecatedLicenseId": false,
| "detailsUrl": "https://spdx.org/licenses/GPL-2.0-or-later.json",
| "referenceNumber": 629,
| "name": "GNU General Public License v2.0 or later",
| "licenseId": "GPL-2.0-or-later",
| "seeAlso": [
| "https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html",
| "https://opensource.org/licenses/GPL-2.0"
| ],
| "isOsiApproved": true,
| "isFsfLibre": true
| }
| ],
| "releaseDate": "2024-06-28"
|}
""".stripMargin
}
Loading