I can run DQD using the connection parameters shown below but I get an SSL error if I try to use connect() and I get the same error trying to use the synthea etl tool.
Full script is below.
Any help would be greatly appreciated.
Connection Parameters
dbms <- "sql server"
user <- "synthea_user"
password <- "sneeker"
server <- "localhost"
port <- "1433"
pathToDriver <- pathToDriver
extraSettings <- ";databaseName=master;integratedSecurity=true;encrypt=false"
Error
Error in rJava::.jcall(jdbcDriver, "Ljava/sql/Connection;", "connect", :
com.microsoft.sqlserver.jdbc.SQLServerException: The driver could not establish a secure connection to SQL Server by using Secure Sockets Layer (SSL) encryption. Error: "PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target". ClientConnectionId:1799f03c-5ddf-4498-9670-6fcf87f557ac
Full Script
# local files
pathToDriver <- "D:\\NACHC\\SYNTHEA\\DQD\\resources\\jar\\sqlserver-jar" # location of the mssql-jdbc-10.2.0.jre8.jar
outputFolder <- "D:\\NACHC\\SYNTHEA\\DQD\\output" # location where output file will be written
outputFile <- "results.json" # file for results json
# database connectivity
dbms <- "sql server"
user <- "synthea_user"
password <- "sneeker"
server <- "localhost"
port <- "1433"
pathToDriver <- pathToDriver
extraSettings <- ";databaseName=master;integratedSecurity=true;encrypt=false"
# database schemas
cdmDatabaseSchema <- "synthea_micro.dbo" # your omop instance
resultsDatabaseSchema <- "synthea_micro_dqd_results.dbo" # instance where results will be written
cdmSourceName <- "SYNTHEA_MICRO_Test_Database" # a human readable name for your CDM source
# config parameters
numThreads <- 1 # number of threads to run, 3 seems to work well on Redshift
sqlOnly <- FALSE # set to TRUE if you just want to get the SQL scripts and not actually run the queries
verboseMode <- TRUE # set to TRUE if you want to see activity written to the console
writeToTable <- TRUE # set to FALSE if you want to skip writing to a SQL table in the results schema
# dqd parameters
checkLevels <- c("TABLE", "FIELD", "CONCEPT") # which DQ check levels to run
checkNames <- c() # which DQ checks to run, names can be found in inst/csv/OMOP_CDM_v5.3.1_Check_Desciptions.csv
tablesToExclude <- c() # which CDM tables to exclude?
# ---
#
# run-dqd.r
#
# Run the init-parameters.r script before running this script.
#
# Script to run the Data Quality Dashboard (DQD)
#
# The output of this script is a single JSON file that can then be rendered with the run-dqd.r script
# to view the results as a web page in a browser.
#
# Prior to running this script you will need to download and install the jdbc driver For MS Sql Server.
# This is entered as the pathToDriver variable below.
#
# ---
# ---
#
# CREATE THE CONNECTION OBJECT AND RUN THE JOB
#
# ---
# create connection details object
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = dbms,
user = user,
password = password,
server = server,
port = port,
pathToDriver = pathToDriver,
extraSettings = extraSettings
)
# conn <- connect(cd)
# disconnect(conn)
# (OPTIONAL): if writing to table and using Redshift, bulk loading can be initialized
# Sys.setenv (
# "AWS_ACCESS_KEY_ID" = "",
# "AWS_SECRET_ACCESS_KEY" = "",
# "AWS_DEFAULT_REGION" = "",
# "AWS_BUCKET_NAME" = "",
# "AWS_OBJECT_KEY" = "",
# "AWS_SSE_TYPE" = "AES256",
# "USE_MPP_BULK_LOAD" = TRUE
# )
# ---
#
# run the job
#
# ---
DataQualityDashboard::executeDqChecks (
connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
resultsDatabaseSchema = resultsDatabaseSchema,
cdmSourceName = cdmSourceName,
numThreads = numThreads,
sqlOnly = sqlOnly,
outputFolder = outputFolder,
outputFile = outputFile,
verboseMode = verboseMode,
writeToTable = writeToTable,
checkLevels = checkLevels,
tablesToExclude = tablesToExclude,
checkNames = checkNames,
cdmVersion = "5.4"
)
# (OPTIONAL) inspect logs
# ParallelLogger::launchLogViewer(
# logFileName = file.path(outputFolder, cdmSourceName,
# sprintf("log_DqDashboard_%s.txt", cdmSourceName))
# )
# (OPTIONAL) if you want to write the JSON file to the results table separately
# jsonFilePath <- ""
# DataQualityDashboard::writeJsonResultsToTable(
# connectionDetails = connectionDetails,
# resultsDatabaseSchema = resultsDatabaseSchema,
# jsonFilePath = jsonFilePath
# )
print("TRYING TO CONNECT USING connect(cd)")
conn <- connect(cd)
disconnect(conn)
This are the connection settings that needed to be used to get this to work.
extraSettings <- ";databaseName=synthea_etl;integratedSecurity=false;encrypt=false;trustServerCertificate=true;sslProtocol=TLSv1"
This is the full script.
# run the following once
devtools::install_github("OHDSI/ETL-Synthea", INSTALL_opts = "--no-multiarch")
library(ETLSyntheaBuilder)
# database connectivity
dbms <- "sql server"
user <- "synthea_etl"
password <- "Sneaker01"
server <- "localhost"
port <- "1433"
pathToDriver <- "D:\\NACHC\\SYNTHEA\\DQD\\resources\\jar\\sqlserver-jar"
extraSettings <- ";databaseName=synthea_etl;integratedSecurity=false;encrypt=false;trustServerCertificate=true;sslProtocol=TLSv1"
cd <- DatabaseConnector::createConnectionDetails(
dbms = dbms,
user = user,
password = password,
server = server,
port = port,
pathToDriver = pathToDriver,
extraSettings = extraSettings
)
# test the connection
conn <- connect(cd)
disconnect(conn)
cdmSchema <- "synthea_etl.dbo"
cdmVersion <- "5.4"
syntheaVersion <- "2.7.0"
syntheaSchema <- "synthea_etl.dbo"
syntheaFileLoc <- "C:\\Users\\gresh\\Downloads\\synthea-etl\\synthea_sample_data_csv_apr2020\\csv"
vocabFileLoc <- "C:\\fhir-to-omop\\terminology\\vocabulary_download_v5_{8c94604f-71b9-47ea-aef2-04be2a7d52b1}_1646823567698"
# ETLSyntheaBuilder::CreateCDMTables(connectionDetails = cd, cdmSchema = cdmSchema, cdmVersion = cdmVersion)
ETLSyntheaBuilder::CreateSyntheaTables(connectionDetails = cd, syntheaSchema = syntheaSchema, syntheaVersion = syntheaVersion)
ETLSyntheaBuilder::LoadSyntheaTables(connectionDetails = cd, syntheaSchema = syntheaSchema, syntheaFileLoc = syntheaFileLoc)
# ETLSyntheaBuilder::LoadVocabFromCsv(connectionDetails = cd, cdmSchema = cdmSchema, vocabFileLoc = vocabFileLoc)
conn <- connect(cd)
dbExecute(conn, paste("delete from ", cdmSchema, ".provider", sep=""))
dbExecute(conn, paste("delete from ", cdmSchema, ".person", sep=""))
disconnect(conn)
ETLSyntheaBuilder::LoadEventTables(connectionDetails = cd, cdmSchema = cdmSchema, syntheaSchema = syntheaSchema, cdmVersion = cdmVersion, syntheaVersion = syntheaVersion)