Skip to content

Commit

Permalink
SONARPY-2477 Collect data for Databricks notebooks (#2260)
Browse files Browse the repository at this point in the history
  • Loading branch information
ghislainpiot authored Dec 18, 2024
1 parent 5b243a7 commit 10d865d
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ private void processNotebooksFiles(List<PythonInputFile> pythonFiles, SensorCont
PythonScanner scanner = new PythonScanner(context, checks, fileLinesContextFactory, noSonarFilter, PythonParser.createIPythonParser(), pythonIndexer);
scanner.execute(pythonFiles, context);
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.NOTEBOOK_RECOGNITION_ERROR_KEY, scanner.getRecognitionErrorCount());
updateDatabricksTelemetry(scanner);
}

private List<PythonInputFile> parseNotebooks(List<PythonInputFile> pythonFiles, SensorContext context) {
Expand Down Expand Up @@ -135,4 +136,9 @@ private static List<PythonInputFile> getInputFiles(SensorContext context) {
private static boolean isErrorOnTestFile(PythonInputFile inputFile) {
return inputFile.wrappedFile().type() == InputFile.Type.TEST;
}

private void updateDatabricksTelemetry(PythonScanner scanner) {
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.IPYNB_DATABRICKS_FOUND, scanner.getFoundDatabricks());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;
import javax.annotation.CheckForNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -78,6 +79,8 @@ public class PythonScanner extends Scanner {
private final PythonIndexer indexer;
private final Map<PythonInputFile, Set<PythonCheck>> checksExecutedWithoutParsingByFiles = new HashMap<>();
private int recognitionErrorCount = 0;
private static final Pattern DATABRICKS_MAGIC_COMMAND_PATTERN = Pattern.compile("^\\h*#\\h*(MAGIC|COMMAND).*");
private boolean foundDatabricks = false;

public PythonScanner(
SensorContext context, PythonChecks checks,
Expand Down Expand Up @@ -150,6 +153,13 @@ protected void scanFile(PythonInputFile inputFile) throws IOException {
new SymbolVisitor(context.newSymbolTable().onFile(inputFile.wrappedFile())).visitFileInput(visitorContext.rootTree());
new PythonHighlighter(context, inputFile).scanFile(visitorContext);
}

searchForDataBricks(visitorContext);
}

private void searchForDataBricks(PythonVisitorContext visitorContext) {
foundDatabricks |= visitorContext.pythonFile().content().lines().anyMatch(
line -> DATABRICKS_MAGIC_COMMAND_PATTERN.matcher(line).matches());
}

private static PythonTreeMaker getTreeMaker(PythonInputFile inputFile) {
Expand Down Expand Up @@ -407,4 +417,8 @@ private static TextRange rangeFromTextSpan(InputFile file, PythonTextEdit python
public int getRecognitionErrorCount() {
return recognitionErrorCount;
}

public boolean getFoundDatabricks() {
return foundDatabricks;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,15 @@ public void execute(SensorContext context) {
TypeShed.setProjectLevelSymbolTable(pythonIndexer.projectLevelSymbolTable());
PythonScanner scanner = new PythonScanner(context, checks, fileLinesContextFactory, noSonarFilter, PythonParser.create(), pythonIndexer);
scanner.execute(pythonFiles, context);
updateDatabricksTelemetry(scanner);
sensorTelemetryStorage.send(context);
durationReport.stop();
}

private void updateDatabricksTelemetry(PythonScanner scanner) {
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_DATABRICKS_FOUND, scanner.getFoundDatabricks());
}

private void updatePythonVersionTelemetry(SensorContext context, String[] pythonVersionParameter) {
if (context.runtime().getProduct() == SonarProduct.SONARLINT) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ public void send(SensorContext sensorContext) {
var apiVersion = sensorContext.runtime().getApiVersion();
if (apiVersion.isGreaterThanOrEqual(Version.create(10, 9))) {
data.forEach((k, v) -> {
LOG.info("Collected metric: {}={}", k, v);
LOG.debug("Collected metric: {}={}", k, v);
sensorContext.addTelemetryProperty(k.key(), v);
});

} else {
LOG.info("Skipping sending metrics because the plugin API version is {}", apiVersion);
LOG.debug("Skipping sending metrics because the plugin API version is {}", apiVersion);
}
} catch (Exception e) {
LOG.error("Failed to send metrics", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ public enum TelemetryMetricKey {
NOTEBOOK_RECOGNITION_ERROR_KEY("python.notebook.recognition_error"),
NOTEBOOK_EXCEPTION_KEY("python.notebook.exceptions"),
PYTHON_VERSION_SET_KEY("python.version.set"),
PYTHON_VERSION_KEY("python.version");
PYTHON_VERSION_KEY("python.version"),
PYTHON_DATABRICKS_FOUND("python.notebook.databricks.python"),
IPYNB_DATABRICKS_FOUND("python.notebook.databricks.ipynb");

private final String key;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1438,6 +1438,36 @@ void send_telemetry_no_version() {
verify(contextSpy, times(1)).addTelemetryProperty(TelemetryMetricKey.PYTHON_VERSION_SET_KEY.key(), "0");
}

@Test
void detects_databricks() {
activeRules = new ActiveRulesBuilder()
.addRule(new NewActiveRule.Builder()
.setRuleKey(RuleKey.of(CheckList.REPOSITORY_KEY, "PrintStatementUsage"))
.setName("Print Statement Usage")
.build())
.build();

inputFile("databricks.py");
var spyContext = spy(context);
sensor().execute(spyContext);
verify(spyContext, times(1)).addTelemetryProperty(TelemetryMetricKey.PYTHON_DATABRICKS_FOUND.key(), "1");
}

@Test
void detects_databricks_negative() {
activeRules = new ActiveRulesBuilder()
.addRule(new NewActiveRule.Builder()
.setRuleKey(RuleKey.of(CheckList.REPOSITORY_KEY, "PrintStatementUsage"))
.setName("Print Statement Usage")
.build())
.build();

inputFile(FILE_1);
var spyContext = spy(context);
sensor().execute(spyContext);
verify(spyContext, times(1)).addTelemetryProperty(TelemetryMetricKey.PYTHON_DATABRICKS_FOUND.key(), "0");
}

private com.sonar.sslr.api.Token passToken(URI uri) {
return com.sonar.sslr.api.Token.builder()
.setType(PythonKeyword.PASS)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Databricks notebooks
# COMMAND ----------

# MAGIC %md
# MAGIC ## Alter tables

# COMMAND ----------

0 comments on commit 10d865d

Please sign in to comment.