My code is connecting to sql server using pyspark. For that connection i am getting encrypted password in jceks. How can i decrypt that password and use to load the tables from sql server. Please suggest.
import pyspark
import re
from pyspark_llap import HiveWarehouseSession
from pyspark.sql.functions import struct
from pyspark.sql.functions import *
from pyspark.sql.session import SparkSession
spark = SparkSession \
.builder \
.appName("Python Spark SQL data source example") \
.getOrCreate()
hive = HiveWarehouseSession.session(spark).build()
df1 = spark.read.format("jdbc") \
.option("url", "URL") \
.option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
.option("dbtable", "tableName") \
.option("user", "user") \
.option("password", "password_alias").load()
I know it's little late to answer this question, this is one of the ways to pass the alias as password
You need to decrypt the password_alias using hadoopConfiguration, and pass it to spark.
#Declaring Jceks path and password alias path
jceks_path="jceks://hdfs/sqlserver.password.jceks"
alias="password_alias"
# Reading the path of jceks using hadoopConfiguration
conf = spark.sparkContext._jsc.hadoopConfiguration()
conf.set('{0}'.format("hadoop.security.credential.provider.path"), jceks_path)
# Get password and make it a string.
credential_raw = conf.getPassword(alias)
password = ''
for i in range(credential_raw.__len__()):
password = password + str(credential_raw.__getitem__(i))
# Pass the password string to spark.
df1 = spark.read.format("jdbc") \
.option("url", "URL") \
.option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
.option("dbtable", "tableName") \
.option("user", "user") \
.option("password", password).load()