diff --git a/core/pom.xml b/core/pom.xml index ae6e8d6f..1a2b0781 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -61,6 +61,10 @@ org.springframework.boot spring-boot-starter-data-jpa + + org.springframework.boot + spring-boot-starter-webflux + org.hsqldb hsqldb diff --git a/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java b/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java index 0b4449f2..8927aaf8 100644 --- a/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java +++ b/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java @@ -9,11 +9,16 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.http.MediaType; +import org.springframework.web.reactive.function.client.WebClient; import com.devonfw.tools.solicitor.SolicitorVersion; import com.devonfw.tools.solicitor.common.content.ContentProvider; import com.devonfw.tools.solicitor.common.content.web.WebContent; +import reactor.core.publisher.Mono; + /** * A {@link LicenseUrlGuesser} which tries to strategically find a possible better license URL. */ @@ -24,7 +29,13 @@ public class StrategyLicenseUrlGuesser implements LicenseUrlGuesser { private ContentProvider webContentProvider; private SolicitorVersion solicitorVersion; + + @Value("${solicitor.githubtoken}") + private String token; + + private WebClient client = WebClient.create("https://api.github.com"); + /** * The constructor. * @@ -56,14 +67,19 @@ private void setTrace(String trace, StringBuilder traceBuilder) { traceBuilder.append(trace).append('\n'); } - // https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.stat - // https://raw.githubusercontent.com/nodelib/nodelib/master/packages/fs/fs.stat/README.md // helper method that normalizes a github url and retrieves the raw link to // a given license private String normalizeGitURL(String url, StringBuilder traceBuilder) { - String oldURL = url; - if (url.contains("github")) { + // case that github remote repository link is given + if (url.contains("github.com") && url.endsWith(".git")) { + url = githubAPILicenseUrl(url, token); + if(!url.equals(oldURL) && !url.contains("api.github.com")) { + setTrace("URL changed from " + oldURL + " to " + url, traceBuilder); + return url; + } + } + if (url.contains("github.com")) { // use https for all github URLs url = url.replace("http:", "https:"); // omit repo suffix if existent @@ -154,4 +170,48 @@ public GuessedLicenseUrlContent getContentForUri(String uri) { return new GuessedLicenseUrlContent(guessedUrl, auditLogBuilder.toString()); } + //tries to get github license file location based of vsc-link + public String githubAPILicenseUrl(String link, String token) { + + String fallbackLink = link; + + String result = ""; + if (link.contains("github.com")) { + if (link.endsWith(".git")) { + link = link.substring(0, link.length() - 4); + } + link = link.replace("git://", ""); + link = link.replace("ssh://", ""); + link = link.replace("git@", ""); + link = link.replace("https://", ""); + link = link.replace("api.github.com/", ""); + link = link.replace("github.com/", ""); + + //TODO it should be better to parse the response directly into a JSON object, not string + result = client.get() + .uri("/repos/" + link + "/license") + .header("Accept", "application/vnd.github+json") + .header("Authorization", "Bearer " + token) + .accept(MediaType.APPLICATION_JSON) + .retrieve() + .onStatus(status -> status.isError(), + response -> Mono.empty()) + .bodyToMono(String.class) + .block(); //TODO this blocks the thread probably + + if (result.contains("download_url")) { + result = result.substring(result.indexOf("\"download_url\":")); + result = result.substring(16,result.indexOf(",")-1); + } + if (result.contains("\"message\":\"Moved Permanently\"")) { + String tempLink = result.substring(result.indexOf("\"url\":")); + tempLink = tempLink.substring(7,result.indexOf(",")-1); + result = githubAPILicenseUrl(tempLink, token); + } + if (result.contains("\"message\":\"Not Found\"")) { + result = fallbackLink; + } + } + return result; + } } diff --git a/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java b/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java index db15b3a3..b51f7e30 100644 --- a/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java +++ b/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java @@ -91,11 +91,11 @@ public void readInventory(String type, String sourceUrl, Application application List lic = (List) singlePackage.get("declared_licenses"); if (lic.isEmpty()) { // add empty raw license if no license info attached - addRawLicense(appComponent, null, null, sourceUrl); + addRawLicense(appComponent, null, repo, sourceUrl); } else { for (Object cl : lic) { licenseCount++; - addRawLicense(appComponent, cl.toString(), null, sourceUrl); + addRawLicense(appComponent, cl.toString(), repo, sourceUrl); } } doLogging(sourceUrl, application, componentCount, licenseCount); diff --git a/core/src/main/resources/application.properties b/core/src/main/resources/application.properties index 8d56c0fb..217af3a4 100644 --- a/core/src/main/resources/application.properties +++ b/core/src/main/resources/application.properties @@ -41,6 +41,9 @@ solicitor.classpath-guessedlicenseurl-cache-locations=licenseurls # Deprecated features are deactivated by default. If set to true they might be (temporarily) activated. solicitor.deprecated-features-allowed=false +# input for personal github api token to increase ORT-reader rate limit +solicitor.githubtoken= + ## Feature flags for activation of non-standard/experimental functionality # Incorporate scancode infos into model solicitor.feature-flag.scancode=false diff --git a/core/src/test/resources/analyzer-result.json b/core/src/test/resources/analyzer-result.json index 22a7f571..0098293f 100644 --- a/core/src/test/resources/analyzer-result.json +++ b/core/src/test/resources/analyzer-result.json @@ -49,7 +49,7 @@ }, "vcs_processed" : { "type" : "", - "url" : "", + "url" : "ssh://git@github.com/hamcrest/JavaHamcrest.git", "revision" : "", "path" : "" }, @@ -92,7 +92,7 @@ }, "vcs_processed" : { "type" : "Git", - "url" : "https://github.com/testproject.git", + "url" : "ssh://git@github.com/hamcrest/JavaHamcrest.git", "revision" : "", "path" : "testproject" } diff --git a/documentation/master-solicitor.asciidoc b/documentation/master-solicitor.asciidoc index 9c52320c..cdbb939b 100644 --- a/documentation/master-solicitor.asciidoc +++ b/documentation/master-solicitor.asciidoc @@ -321,7 +321,7 @@ The leading section of the config file defines some metadata and the engagement <9> does the customer provide the OSS? (boolean) ==== Applications -Within this section the different applications (=deliverables) of the engagement are defined. Furthermore, for each application at least one reader needs to be defined which imports the component and license information. +Within this section the different applications (= deliverables) of the engagement are defined. Furthermore, for each application at least one reader needs to be defined which imports the component and license information. [listing] "applications" : [ { @@ -602,7 +602,7 @@ These configurations may also be used to overwrite options of a https://commons. Important: In case that a component has multiple licenses attached, there needs to be a separate line in the csv file for each license. -WARNING: The CSV reader currently does not fill the attribute `packageUrl`. Any functionality/reporting based on this attribute will be disfunctional for data read by the CSV reader. +WARNING: The CSV reader currently does not fill the attribute `packageUrl`. Any functionality/reporting based on this attribute will be dysfunctional for data read by the CSV reader. === NPM @@ -786,7 +786,13 @@ In _Solicitor_ the data is read with the following part of the config } ] ---- -WARNING: The ORT reader currently does not yet fill the attribute `licenseUrl`. Any functionality/reporting based on this attribute will be disfunctional for data read by the ORT reader. +It is important to note that a personal Github token is required on executing _Solicitor_ as it is needed to fill the LicenseURL data via Github API requests. You can generate tokens on your account https://github.com/settings/tokens[here]. This token increases the rate limit of the Github API from 60 to 5000. +The token can be inputted via _Solicitor_ properties like this: + +---- +java -Dsolicitor.githubtoken=token -jar solicitor.jar -c file:solicitor.cfg +---- + === Gradle (Windows) @@ -1089,7 +1095,7 @@ groups to be processed in sequence. When using the builtin default base configur To use your own rule data there are three approaches: * Include your own `rules` section in the project configuration file (so not inheriting from the builtin base configuration file) and reference your own decision tables there. -* Create your own "Solicitor Extension" which might completely redefine/replace the builtin `Solicitor` setup including all decision tables and the base configuration file. See <> for details. +* Create your own "Solicitor Extension" which might completely redefine/replace the built-in `Solicitor` setup including all decision tables and the base configuration file. See <> for details. * Make use of the optional project specific decision tables which are defined in the default base configuration: For every builtin decision table there is an optional external decision table (expected in the filesystem) which will be checked for existence. If such external decision table exists it will be processed first - before processing the builtin decision table. Thus is it possible to amend / override the builtin rules by project specific rules. When you create the starter configuration of your project as described in <>, those project specific decision tables are automatically created. == Reporting and Creating output documents @@ -1237,6 +1243,7 @@ In general it is possible to manually correct this by editing the downloaded and Currently license URL guessing is based solely on the URL given in `NormalizedLicense.effectiveNormalizedLicenseUrl`. It will try the following approaches: +* If the original URL is a remote Github repository URL (with a .git ending) then a Github API call is attempted to retrieve the correct license URL. * If the original URL is a Github-URL and matches patterns which are known to return HTML-formatted content then the URL is rewritten to point to a raw version of the content. * If the original URL points to a Github project page (not to a file), then the algorithm will try different typical locations (like e.g. looking for file `LICENSE`). If found it will return this URL as result. * If no "better" URL could be guessed it will return the original URL. @@ -1555,6 +1562,7 @@ Changes in 1.6.0:: * https://github.com/devonfw/solicitor/issues/146: Fixed the bug which prevented already defined velocity macro with same name to be redefined in different template. * https://github.com/devonfw/solicitor/issues/135: Introduce `sourceRepoUrl` as new property in `ApplicationComponent`. Depending on the kind of Reader either `ossHomepage` and/or `sourceRepoUrl` will be filled with data. * https://github.com/devonfw/solicitor/issues/149: Added name mappings so that for all SPDX-IDs used in the name mapping the SPDX-ID itself is also recognized and formally mapped. +* Improved ORT-Reader with LicenseURL extraction via Github-API. See <>. Changes in 1.5.0:: * https://github.com/devonfw/solicitor/issues/6: Fixed the bug by allowing multiple `NormalizedLicense` entries with same id per `ApplicationComponent` if the declared license differs. This allows to assign multiple licenses of same type (e.g. MIT) to a component and also will allow multiple "UNKNOWN" licenses to be reported for the same component. Note that as a side effect additional and unexpected `NormalizedLicense` entries might now be created. This might be caused from multiple `LicenseAssignment*.xls` rules firing for different `RawLicense` entries in the same `ApplicationComponent` and resulting in identical `NormalizedLicense` id. In this case it is necessary to restrict those different rules to only fire for specific `RawLicense` entries.