-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
update licenses.json and parsing without xml conversion #94
Changes from 2 commits
0bc98f4
9e844f0
0997436
6ec00f0
e2e28e1
b638793
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,2 @@ | ||
Updated version of licenses.json file may be found here: | ||
https://github.com/spdx/license-list-data | ||
|
||
licenses.xml is generated from original licenses.json using online xml converter: | ||
http://convertjson.com/json-to-xml.htm | ||
https://github.com/spdx/license-list-data/blob/main/json/licenses.json | ||
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
package com.github.sbt.sbom.licenses | ||
|
||
final case class License(id: Option[String] = None, name: Option[String] = None, references: Seq[String] = Seq()) | ||
final case class License(id: String, name: String, references: Seq[String]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,36 @@ | ||
package com.github.sbt.sbom.licenses | ||
|
||
import com.github.sbt.sbom.licenses.LicensesArchive.normalizeUrl | ||
|
||
import scala.io.Source | ||
|
||
class LicensesArchive(licenses: Seq[License]) { | ||
private val licensesByUrl: Map[String, License] = licenses.foldLeft(Map[String, License]()) { (map, license) => | ||
map ++ license.references.foldLeft(Map[String, License]()) { (map, ref) => | ||
map + (ref -> license) | ||
} | ||
} | ||
private val licensesByNormalizedUrl: Map[String, License] = | ||
licenses.iterator.flatMap { license => | ||
license.references.map { reference => | ||
(normalizeUrl(reference), license) | ||
} | ||
}.toMap | ||
|
||
def findByUrl(url: String): Option[License] = licensesByUrl.get(url) | ||
def findByNormalizedUrl(url: String): Option[License] = licensesByNormalizedUrl.get(normalizeUrl(url)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really need this method? It seems only to be used in tests, or am I misunderstanding that? |
||
|
||
def findById(id: String): Option[License] = licenses.find(_.id.contains(id)) | ||
} | ||
|
||
object LicensesArchive { | ||
private lazy val fileStream = getClass.getResourceAsStream("/licenses.xml") | ||
private lazy val archiveText = Source.fromInputStream(fileStream).mkString | ||
private lazy val archive = new LicensesArchive(new LicensesArchiveParser(archiveText).licenses) | ||
private def normalizeUrl(url: String): String = url.toLowerCase | ||
.replaceFirst("^https://", "http://") | ||
.replaceFirst("\\.html$", "") | ||
.replaceFirst("\\.txt$", "") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this safe? Wouldn't this risk changing the URL in a way that might not resolve anymore? If we do do this, wouldn't it be nicer to normalize towards using https instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The normalization is really only used for comparing urls. Sadly the licenses that sbt returns often don't match with the urls from the license.json. using these three replacements got me really far to match most license ids. Yes https would probably be better even if it doesn't really matter that much. |
||
|
||
private def loadResourceAsString(resource: String): String = { | ||
val fileStream = getClass.getResourceAsStream(resource) | ||
Source.fromInputStream(fileStream).mkString | ||
} | ||
|
||
def fromJsonString(json: String): LicensesArchive = | ||
new LicensesArchive(LicensesArchiveJsonParser.parseString(json)) | ||
|
||
def findByUrl(url: String): Option[License] = archive.findByUrl(url) | ||
lazy val bundled: LicensesArchive = | ||
fromJsonString(loadResourceAsString("/licenses.json")) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package com.github.sbt.sbom.licenses | ||
|
||
import io.circe.Decoder | ||
import io.circe.generic.semiauto.deriveDecoder | ||
import io.circe.parser.* | ||
|
||
import scala.util.control.NonFatal | ||
|
||
private[licenses] object LicensesArchiveJsonParser { | ||
private case class LicenseJson( | ||
licenseId: String, | ||
name: String, | ||
seeAlso: Seq[String] | ||
) | ||
|
||
private object LicenseJson { | ||
implicit val decoder: Decoder[LicenseJson] = deriveDecoder | ||
} | ||
|
||
private case class LicensesArchiveJson( | ||
licenses: Seq[LicenseJson] | ||
) | ||
|
||
private object LicensesArchiveJson { | ||
implicit val decoder: Decoder[LicensesArchiveJson] = deriveDecoder | ||
} | ||
|
||
private def licenseFromLicenseEntry(licenseEntry: LicenseJson): License = License( | ||
id = licenseEntry.licenseId, | ||
name = licenseEntry.name, | ||
references = licenseEntry.seeAlso | ||
) | ||
|
||
def parseString(string: String): Seq[License] = { | ||
val licensesArchiveJson = | ||
try { | ||
decode[LicensesArchiveJson](string).toTry.get | ||
} catch { | ||
case NonFatal(e) => throw new RuntimeException("failed to parse licenses archive json", e) | ||
} | ||
|
||
licensesArchiveJson.licenses.map(licenseFromLicenseEntry) | ||
} | ||
} |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,83 +1,91 @@ | ||
package com.github.sbt.sbom | ||
|
||
import com.github.sbt.sbom.licenses.{ LicensesArchive, LicensesArchiveParser } | ||
import com.github.sbt.sbom.licenses.LicensesArchive | ||
import org.scalatest.matchers.should.Matchers | ||
import org.scalatest.wordspec.AnyWordSpec | ||
|
||
class LicensesArchiveSpec extends AnyWordSpec with Matchers { | ||
"LicensesArchiveParser" should { | ||
"fail parsing a not valid archive" in { | ||
new LicensesArchiveParser("").isValid shouldBe false | ||
assertThrows[RuntimeException] { | ||
LicensesArchive.fromJsonString("") | ||
} | ||
} | ||
|
||
"parse a valid archive" in { | ||
new LicensesArchiveParser(xml).isValid shouldBe true | ||
LicensesArchive.fromJsonString(json) | ||
} | ||
} | ||
|
||
"LicenseRegister" should { | ||
"find no license by ref" in { | ||
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses) | ||
register.findByUrl("http://www.domain.com/missingLicense") shouldBe None | ||
val register = LicensesArchive.fromJsonString(json) | ||
register.findByNormalizedUrl("http://www.domain.com/missingLicense") shouldBe None | ||
} | ||
|
||
"find licenses by ref" in { | ||
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses) | ||
val gps2 = register.findByUrl("http://www.opensource.org/licenses/GPL-2.0") | ||
val zeroBsd = register.findByUrl("http://landley.net/toybox/license.html") | ||
val register = LicensesArchive.fromJsonString(json) | ||
val gps2 = register.findByNormalizedUrl("https://opensource.org/licenses/GPL-2.0") | ||
val zeroBsd = register.findByNormalizedUrl("http://landley.net/toybox/license.html") | ||
|
||
gps2.isDefined shouldBe true | ||
gps2.get.id shouldBe Some("GPL-2.0") | ||
gps2.get.id shouldBe "GPL-2.0-or-later" | ||
zeroBsd.isDefined shouldBe true | ||
zeroBsd.get.id shouldBe Some("0BSD") | ||
zeroBsd.get.id shouldBe "0BSD" | ||
} | ||
|
||
"find no licenses by id" in { | ||
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses) | ||
val register = LicensesArchive.fromJsonString(json) | ||
register.findById("an invalid id") shouldBe None | ||
} | ||
|
||
"shoud read licenses from resource file" in { | ||
val gpl2OrLater = LicensesArchive.findByUrl("https://opensource.org/licenses/GPL-2.0") | ||
val gpl2OrLater = LicensesArchive.bundled.findByNormalizedUrl("https://opensource.org/licenses/GPL-2.0") | ||
gpl2OrLater.isDefined shouldBe true | ||
gpl2OrLater.get.id shouldBe Some("GPL-2.0") | ||
gpl2OrLater.get.id shouldBe "GPL-2.0-or-later" | ||
} | ||
|
||
"find licenses by id" in { | ||
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses) | ||
val gpl2 = register.findById("GPL-2.0") | ||
val register = LicensesArchive.fromJsonString(json) | ||
val gpl2 = register.findById("GPL-2.0-or-later") | ||
gpl2.isDefined shouldBe true | ||
gpl2.get.id shouldBe Some("GPL-2.0") | ||
gpl2.get.id shouldBe "GPL-2.0-or-later" | ||
} | ||
} | ||
|
||
val xml: String = | ||
""" | ||
|<root> | ||
| <licenseListVersion>v3.4-5-gb3d735f</licenseListVersion> | ||
| <licenses> | ||
| <reference>./0BSD.html</reference> | ||
| <isDeprecatedLicenseId>false</isDeprecatedLicenseId> | ||
| <detailsUrl>http://spdx.org/licenses/0BSD.json</detailsUrl> | ||
| <referenceNumber>310</referenceNumber> | ||
| <name>BSD Zero Clause License</name> | ||
| <licenseId>0BSD</licenseId> | ||
| <seeAlso>http://landley.net/toybox/license.html</seeAlso> | ||
| <isOsiApproved>true</isOsiApproved> | ||
| </licenses> | ||
| <licenses> | ||
| <reference>./GPL-2.0.html</reference> | ||
| <isDeprecatedLicenseId>true</isDeprecatedLicenseId> | ||
| <isFsfLibre>true</isFsfLibre> | ||
| <detailsUrl>http://spdx.org/licenses/GPL-2.0.json</detailsUrl> | ||
| <referenceNumber>140</referenceNumber> | ||
| <name>GNU General Public License v2.0 only</name> | ||
| <licenseId>GPL-2.0</licenseId> | ||
| <seeAlso>http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html</seeAlso> | ||
| <seeAlso>http://www.opensource.org/licenses/GPL-2.0</seeAlso> | ||
| <isOsiApproved>true</isOsiApproved> | ||
| </licenses> | ||
| <releaseDate>2019-01-16</releaseDate> | ||
|</root> | ||
lazy val json: String = | ||
"""{ | ||
| "licenseListVersion": "b5a3b2e", | ||
| "licenses": [ | ||
| { | ||
| "reference": "https://spdx.org/licenses/0BSD.html", | ||
| "isDeprecatedLicenseId": false, | ||
| "detailsUrl": "https://spdx.org/licenses/0BSD.json", | ||
| "referenceNumber": 430, | ||
| "name": "BSD Zero Clause License", | ||
| "licenseId": "0BSD", | ||
| "seeAlso": [ | ||
| "http://landley.net/toybox/license.html", | ||
| "https://opensource.org/licenses/0BSD" | ||
| ], | ||
| "isOsiApproved": true | ||
| }, | ||
| { | ||
| "reference": "https://spdx.org/licenses/GPL-2.0-or-later.html", | ||
| "isDeprecatedLicenseId": false, | ||
| "detailsUrl": "https://spdx.org/licenses/GPL-2.0-or-later.json", | ||
| "referenceNumber": 629, | ||
| "name": "GNU General Public License v2.0 or later", | ||
| "licenseId": "GPL-2.0-or-later", | ||
| "seeAlso": [ | ||
| "https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html", | ||
| "https://opensource.org/licenses/GPL-2.0" | ||
| ], | ||
| "isOsiApproved": true, | ||
| "isFsfLibre": true | ||
| } | ||
| ], | ||
| "releaseDate": "2024-06-28" | ||
|} | ||
""".stripMargin | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍 much nicer