jan 18 "value1 is null"
feb 4 "value1 is null"
in the above dataset there is consecutive delimiters between the 1st and 2nd column in second row how to handle consecutive delimiters as one delimiter.
create external table mydata
(
c1 string
,c2 string
,c3 string
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties ('input.regex' = '(".*?"|.*?)\\s+(".*?"|.*?)\\s+(".*?"|.*?)')
location '/user/hive/warehouse/mydata'
;
select * from mydata;
+-----------+-----------+------------------+
| mydata.c1 | mydata.c2 | mydata.c3 |
+-----------+-----------+------------------+
| jan | 18 | "value1 is null" |
| feb | 4 | "value1 is null" |
+-----------+-----------+------------------+