I want to change the data type of a column of a table in a DuckDB Database.
With
query2_c= ALTER TABLE populationShort ALTER Year SET DATA TYPE DATE;
(C Language Binding) I get Segmentation fault (core dumped) error.
The description of the segmentation fault error, obtained with the gdbdebugger, is :
duckdb::DeprecatedMaterializeResult(duckdb_result*)
How to correctly modify via query the data type of the column "Year"?
I upgraded duckdb version from 1.2.0 to the latest 1.3.2. And the problem persists. So it must be something in the code used
This is the complete code:
duckdb_database CSVDuckDB = NULL;
duckdb_connection CSVDuckDBConnection = NULL;
duckdb_result CSVDuckDBResult;
duckdb_state CSVDuckDBState;
const char* CSVDuckDBCompletePath = CSVDuckDBCompletePath_s.c_str();
if (duckdb_open(CSVDuckDBCompletePath, &CSVDuckDB) == DuckDBError)
{
fprintf(stderr, "Failed to open CSVDuckDB\n");
// Clean-up
duckdb_destroy_result(&CSVDuckDBResult);
duckdb_disconnect(&CSVDuckDBConnection);
duckdb_close(&CSVDuckDB);
}
if (duckdb_connect(CSVDuckDB, &CSVDuckDBConnection) == DuckDBError)
{
fprintf(stderr, "Failed to open connection to CSVDuckDB\n");
// Clean-up
duckdb_destroy_result(&CSVDuckDBResult);
duckdb_disconnect(&CSVDuckDBConnection);
duckdb_close(&CSVDuckDB);
}
std::string justname_s_pure = justname_s_splitted[0];
std::string gettablename = std::format("{}", justname_s_pure);
std::cout << "gettablename= " << gettablename << std::endl;
std::string query = "CREATE OR REPLACE TABLE ";
query.append(gettablename);
query.append(" AS SELECT * FROM read_csv('");
std::string getfilename = std::format("{}", fileName_s);
query.append(getfilename);
//query.append("', sample_size = -1);");
query.append("', strict_mode = false, ignore_errors = true);");
const char* query_c = query.c_str();
std::cout << "query_c= " << query_c << std::endl;
CSVDuckDBState = duckdb_query(CSVDuckDBConnection, query_c, &CSVDuckDBResult);
if (CSVDuckDBState == DuckDBError)
{
std::cout << "duckdb_query produced DuckDBError" << std::endl;
// Clean-up
duckdb_destroy_result(&CSVDuckDBResult);
duckdb_disconnect(&CSVDuckDBConnection);
duckdb_close(&CSVDuckDB);
}
else
{
std::cout << "duckdb_query is ok" << std::endl;
duckdb_result_type CSVDuckDBReturnType = duckdb_result_return_type(CSVDuckDBResult);
std::cout << "CSVDuckDBReturnType= " << CSVDuckDBReturnType << std::endl;
}
query = "SELECT column_name, ordinal_position, data_type FROM information_schema.columns;";
query_c = query.c_str();
std::cout << "query_c= " << query_c << std::endl;
CSVDuckDBState = duckdb_query(CSVDuckDBConnection, query_c, &CSVDuckDBResult);
if (CSVDuckDBState == DuckDBError)
{
std::cout << "duckdb_query of information_schema.columns produced DuckDBError" << std::endl;
// Clean-up
duckdb_destroy_result(&CSVDuckDBResult);
duckdb_disconnect(&CSVDuckDBConnection);
duckdb_close(&CSVDuckDB);
} idx_t col_count = duckdb_column_count(&CSVDuckDBResult);
idx_t row_count = duckdb_row_count(&CSVDuckDBResult);
for (size_t row_idx = 0; row_idx < row_count; row_idx++)
{
std::cout << "row:" << row_idx << std::endl;
char* val_idx_0 = duckdb_value_varchar(&CSVDuckDBResult, 0, row_idx);
std::string val_idx_0_s (val_idx_0);
printf("%s ", val_idx_0);
char* val_idx_2 = duckdb_value_varchar(&CSVDuckDBResult, 2, row_idx);
printf("%s ", val_idx_2);
std::string val_idx_2_s (val_idx_2);
if (
(Grasp::IsStringIntoVect(date_csv_fields, Grasp::StringToLower(val_idx_0_s)))
&&
(Grasp::IsStringIntoVect(duckdbNumericalDataTypes, val_idx_2_s))
)
{
std::cout << "IT HAS TO BE CHANGED TO DATE TYPE" << std::endl;
// https://duckdb.org/docs/stable/sql/statements/alter_table.html
std::string query2 = "ALTER TABLE ";
query2.append(gettablename);
query2.append(" ALTER '");
std::string getValIdx0s = std::format("{}", val_idx_0_s);
query2.append(getValIdx0s);
query2.append("' SET DATA TYPE USING MAKE_DATE('year',1,1);");
const char* query2_c = query2.c_str();
std::cout << "query2_c= " << query2_c << std::endl;;
CSVDuckDBState = duckdb_query(CSVDuckDBConnection, query2_c, &CSVDuckDBResult);
if (CSVDuckDBState == DuckDBError)
{
std::cout << "duckdb_query of altering the data type produced DuckDBError" << std::endl;
// Clean-up
duckdb_destroy_result(&CSVDuckDBResult);
duckdb_disconnect(&CSVDuckDBConnection);
duckdb_close(&CSVDuckDB);
}
}
printf("\n");
}
// Clean-up
duckdb_destroy_result(&CSVDuckDBResult);
duckdb_disconnect(&CSVDuckDBConnection);
duckdb_close(&CSVDuckDB);
From the complete output:
gettablename= populationShort
query_c= CREATE OR REPLACE TABLE populationShort AS SELECT * FROM
read_csv('/home/raphy/Downloads/CSVFiles/populationShort.csv',
strict_mode = false, ignore_errors = true);
duckdb_query is ok
CSVDuckDBReturnType= 2
query_c= SELECT column_name, ordinal_position, data_type FROM
information_schema.columns;
row:0
Country Name VARCHAR
row:1
Country Code VARCHAR
row:2
Year BIGINT IT HAS TO BE CHANGED TO DATE TYPE
query2_c= ALTER TABLE populationShort ALTER 'Year' SET DATA TYPE
DATE;
duckdb_query of altering the data type produced DuckDBError
row:3
Segmentation fault (core dumped)
(base) raphy@raohy:/var/crash$ gdb /home/raphy/MyPrj/builddir/
MyPrj ./core_MyPrj.27982
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x00007652aed97360 in
duckdb::DeprecatedMaterializeResult(duckdb_result*) ()
from /home/raphy/MyPrj/./src/DuckDB/lib/libduckdb.so
it is clear that the code causing the segmentation fault is the following part:
// https://duckdb.org/docs/stable/sql/statements/alter_table.html
std::string query2 = "ALTER TABLE ";
query2.append(gettablename);
query2.append(" ALTER '");
std::string getValIdx0s = std::format("{}", val_idx_0_s);
query2.append(getValIdx0s);
query2.append("' SET DATA TYPE USING MAKE_DATE('year',1,1);"); // N.B.: or with the alternative presented in Update 01, see below in the question.
const char* query2_c = query2.c_str();
std::cout << "query2_c= " << query2_c << std::endl;;
CSVDuckDBState = duckdb_query(CSVDuckDBConnection, query2_c, &CSVDuckDBResult);
if (CSVDuckDBState == DuckDBError)
{
std::cout << "duckdb_query of altering the data type produced DuckDBError" << std::endl;
// Clean-up
duckdb_destroy_result(&CSVDuckDBResult);
duckdb_disconnect(&CSVDuckDBConnection);
duckdb_close(&CSVDuckDB);
}
This is the content of populationShort.csv file :
Country Name,Country Code,Year,Value
Aruba,ABW,1960,54922
Aruba,ABW,1961,55578
Aruba,ABW,1962,56320
And these are the date_csv_fields and duckdbNumericalDataTypes vectors:
std::vector<std::string> date_csv_fields = {
"month",
"year",
"day"
};
std::vector<std::string> duckdbNumericalDataTypes = {
"BIGINT",
"DECIMAL",
"FLOAT",
"DOUBLE",
"HUGEINT",
"SMALLINT",
"UBIGINT",
"UHUGEINT",
"UINTEGER",
"USMALLINT",
"UTINYINT"
};
I get
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x000076efd0d97360 in
duckdb::DeprecatedMaterializeResult(duckdb_result*) ()
also if the query executed is:
query2_c= ALTER TABLE populationShort ALTER 'Year' SET DATA TYPE DATE
I modified query2 as follows to avoid pointed out SQL syntax problem, and separate concerns (SQL code vs C SIGSEGV):
std::string query2 = "ALTER TABLE ";
query2.append(gettablename);
query2.append(" ALTER \"");
std::string getValIdx0s = std::format("{}", val_idx_0_s);
query2.append(getValIdx0s);
//query2.append("' SET DATA TYPE USING MAKE_DATE('year',1,1);");
query2.append("\" SET DATA TYPE DATE;");
const char* query2_c = query2.c_str();
std::cout << "query2_c= " << query2_c << std::endl;;
resulting in:
query2_c= ALTER TABLE populationShort ALTER "Year" SET DATA TYPE DATE;
But still get
Segmentation fault (core dumped)
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x000072894dd97360 in duckdb::DeprecatedMaterializeResult(duckdb_result*) ()
CSVDuckDBResult overwritingSimplifying your code around uses of CSVDuckDBResult, we have:
duckdb_result CSVDuckDBResult;
[…]
CSVDuckDBState = duckdb_query(CSVDuckDBConnection, query_c, &CSVDuckDBResult);
[…]
for (size_t row_idx = 0; row_idx < row_count; row_idx++)
{
char* val_idx_0 = duckdb_value_varchar(&CSVDuckDBResult, 0, row_idx);
[…]
if ([…])
{
[…]
CSVDuckDBState = duckdb_query(CSVDuckDBConnection, query2_c, &CSVDuckDBResult);
[…]
}
}
duckdb_destroy_result(&CSVDuckDBResult);
If you look closely, results for both query_c and query2_c are written to the same CSVDuckDBResult variable, which is still in use in the loop while getting reused in the if([…]).
Thus unrolling your loop over your fourth columns makes:
duckdb_query(CSVDuckDBConnection, query_c, &CSVDuckDBResult)
row_idx = 0duckdb_value_varchar(&CSVDuckDBResult, 0, 0)if does not match, so do not enter its bodyrow_idx = 1duckdb_value_varchar(&CSVDuckDBResult, 0, 1)if does not match, so do not enter its bodyrow_idx = 2duckdb_value_varchar(&CSVDuckDBResult, 0, 2)if matches:
duckdb_query(CSVDuckDBConnection, query2_c, &CSVDuckDBResult) overwrites CSVDuckDBResultrow_idx = 3duckdb_value_varchar(&CSVDuckDBResult, 0, 3) tries to access column 3 of the overwritten CSVDuckDBResult and crashesduckdb_query()ing on a not yet duckdb_destroy_result()ed result)So, as you are using two results concurrently, you just have to declare another result dedicated to the ALTER:
duckdb_result CSVDuckDBResult;
[…]
CSVDuckDBState = duckdb_query(CSVDuckDBConnection, query_c, &CSVDuckDBResult);
[…]
for (size_t row_idx = 0; row_idx < row_count; row_idx++)
{
char* val_idx_0 = duckdb_value_varchar(&CSVDuckDBResult, 0, row_idx);
[…]
if ([…])
{
duckdb_result CSVDuckDBResult2; // ← Declare here
[…]
CSVDuckDBState = duckdb_query(CSVDuckDBConnection, query2_c, &CSVDuckDBResult2); // ← Change to 2
[…]
duckdb_destroy_result(&CSVDuckDBResult2); // ← Get out of the "if (CSVDuckDBState == DuckDBError)", and change to 2
}
}
duckdb_destroy_result(&CSVDuckDBResult);