plsqlutf-8character-encodingansiutl-file

oracle utl_file encoding from utf8


I like to export large amount of text data from db to file. The characterset in db is UTF8. The excepted result in the file is ISO8859P2 or MSWIN1250.

My db settings:

SELECT * FROM v$nls_parameters;
1   NLS_LANGUAGE    HUNGARIAN   0
2   NLS_TERRITORY   HUNGARY 0
9   NLS_CHARACTERSET    UTF8    0
10  NLS_SORT    HUNGARIAN   0
16  NLS_NCHAR_CHARACTERSET  UTF8    0
17  NLS_COMP    BINARY  0
18  NLS_LENGTH_SEMANTICS    CHAR    0
19  NLS_NCHAR_CONV_EXCP FALSE   0

select * from nls_database_parameters;
1   NLS_RDBMS_VERSION   12.1.0.2.0
2   NLS_NCHAR_CONV_EXCP FALSE
15  NLS_NCHAR_CHARACTERSET  UTF8
16  NLS_CHARACTERSET    UTF8
19  NLS_TERRITORY   AMERICA
20  NLS_LANGUAGE    AMERICAN

select * from nls_session_parameters;
1   NLS_LANGUAGE    HUNGARIAN
2   NLS_TERRITORY   HUNGARY
9   NLS_SORT    HUNGARIAN
15  NLS_COMP    BINARY
16  NLS_LENGTH_SEMANTICS    CHAR
17  NLS_NCHAR_CONV_EXCP FALSE

The file created at server directory (linux). I haven't more information for linux characterset settings.

The PLSQL code:

DECLARE
   v_fh     UTL_FILE.FILE_TYPE;
   v_eol    VARCHAR2(2);
   v_eollen PLS_INTEGER;
   CURSOR cur_sql IS

SELECT T3.ID_RESULT
      ,T3.column1 
FROM   table1
WHERE  id_result = 999999
  ;
   "ID_RESULT" DBMS_SQL.NUMBER_TABLE;
   "column1" DBMS_SQL.VARCHAR2A;

BEGIN
   EXECUTE IMMEDIATE 'ALTER SESSION SET NLS_DATE_FORMAT = ''YYYY.MM.DD HH24:MI:SS''';
   v_eol := CHR(13)||CHR(10);
   v_eollen := LENGTH(v_eol);
   v_fh := UTL_FILE.FOPEN('REP_DIR','result_test.csv','W', 32000);

   OPEN cur_sql;
   LOOP
      FETCH cur_sql
      BULK COLLECT INTO "ID_RESULT",
                        "column1",
                        LIMIT 1000;
      IF "ID_RESULT".COUNT > 0 THEN
         FOR i IN "ID_RESULT".FIRST .. "ID_RESULT".LAST LOOP

           UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2','UTF8'));
           UTL_FILE.PUT_nchar(v_fh, v_eol);

           UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250','UTF8'));
           UTL_FILE.PUT(v_fh, v_eol);

           UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2'));
           UTL_FILE.PUT(v_fh, v_eol);

           UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250'));
           UTL_FILE.PUT(v_fh, v_eol);

           UTL_FILE.PUT(v_fh, "column1"(i));
           UTL_FILE.PUT(v_fh, v_eol);             

           UTL_FILE.PUT(v_fh, utl_raw.cast_to_varchar2(utl_raw.convert(utl_raw.cast_to_raw("column1"(i) ),'HUNGARIAN_HUNGARY.EE8MSWIN1250', 'ENGLISH_UNITED KINGDOM.UTF8')));
           UTL_FILE.PUT(v_fh, v_eol);               

           UTL_FILE.fflush(v_fh);
         END LOOP;
      END IF;
      EXIT WHEN cur_sql%NOTFOUND;
   END LOOP;
   CLOSE cur_sql;
   UTL_FILE.FCLOSE(v_fh);
EXCEPTION
   WHEN 
        .........
      RAISE;
END;

The original value in db: value in hexa

The result in Notepad++ (encode in UTF8):

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosné

Csere Lajosn

The result in Notepad++ (encode in ANSI, char set: windows-1250):

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosné

Csere Lajosn

The result in Notepad++ (encode in ANSI, char set: iso-8859-2):

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere LajosnĂŠ

Csere Lajosn

When i use CONVERT i lost "é" character. How can i do convert the string from UTF8 to ANSI?

Thanks, Zoltan


Solution

  • This is from mine toolbox package - try it/tweak. Works with different code pages in-out.

    function dump_dsv_fast(p_query in varchar2
                      ,p_filename in varchar2
                      ,p_dir in varchar2 default c_DEFAULT_DIRECTORY
                      ,p_separator in varchar2 default ';'
                      ,p_text_qualifier in varchar2 default ''
                      ,p_header in boolean default true
                      ,p_eol_format in varchar2 default chr(13) || chr(10)
                      ,p_characterset in varchar2 default 'EE8MSWIN1250'
                      ,p_write_mode in varchar2 default 'WB') return number
    
    
      /****************************************************
      Formats:
      AL32UTF8        --> full utf
      WE8MSWIN1252    --> no pl chars
      WE8MSWIN1250    --> pl ansi
      Limitations:   this one is faster filing buffer once
    
      * 2014/05/13 --> change to raw (end line character)
      * 2016/02/12 --> text qualifier
                   --> header on/off
                   --> line length adjust
                   --> text q
      * 2016/03/09 --> codepage
      *****************************************************/
     is
      l_output      utl_file.file_type;
      l_theCursor   integer default dbms_sql.open_cursor;
      l_columnValue varchar2(32760);
      l_status      integer;
      l_separator   varchar2(10);
      l_colCnt      number default 0;
      l_descTbl     dbms_sql.desc_tab;
      l_cnt         number default 0;
      l_buffer      raw(32767) := null;
    begin
      l_output := utl_file.fopen(p_dir, p_filename, p_write_mode, 32760);
      dbms_sql.parse(l_theCursor, p_query, dbms_sql.native);
      if p_header
      then
        dbms_sql.describe_columns(l_theCursor, l_colCnt, l_descTbl);
        l_separator := '';
        for i in 1 .. l_colCnt
        loop
          if p_characterset = 'EE8MSWIN1250'
          then
            utl_file.put_raw(l_output,
                             utl_raw.cast_to_raw(l_separator ||
                                                  p_text_qualifier || l_descTbl(i)
                                                  .col_name || p_text_qualifier));
          else
            utl_file.put_raw(l_output,
                             utl_i18n.string_to_raw(l_separator ||
                                                     p_text_qualifier || l_descTbl(i)
                                                     .col_name ||
                                                     p_text_qualifier,
                                                     p_characterset));
          end if;
          dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
          l_separator := p_separator;
        end loop;
        utl_file.put_raw(l_output, utl_raw.cast_to_raw(p_eol_format));
      end if;
      for i in 1 .. 255
      loop
        begin
          dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
          l_colCnt := i;
        exception
          when others then
            if (sqlcode = -1007)
            then
              exit;
            else
              raise;
            end if;
        end;
      end loop;
    
      dbms_sql.define_column(l_theCursor, 1, l_columnValue, 32760);
    
      l_status := dbms_sql.execute(l_theCursor);
    
      loop
        exit when(dbms_sql.fetch_rows(l_theCursor) <= 0);
        l_separator := '';
        for i in 1 .. l_colCnt
        loop
          dbms_sql.column_value(l_theCursor, i, l_columnValue);
          if p_characterset = 'EE8MSWIN1250'
          then
            l_buffer := l_buffer ||
                        utl_raw.cast_to_raw(l_separator || p_text_qualifier ||
                                            l_columnValue || p_text_qualifier);
          else
            l_buffer := l_buffer ||
                        utl_i18n.string_to_raw(l_separator || p_text_qualifier ||
                                               l_columnValue ||
                                               p_text_qualifier, p_characterset);
          end if;
          l_separator := p_separator;
        end loop;
        utl_file.put_raw(l_output,
                         l_buffer || utl_raw.cast_to_raw(p_eol_format));
        l_buffer := '';
        l_cnt    := l_cnt + 1;
      end loop;
      dbms_sql.close_cursor(l_theCursor);
    
      utl_file.fclose(l_output);
      return l_cnt;
    end dump_dsv_fast;