awksedtext-processing

Parse text file, change some strings to camel case, add other strings - follow up question


Note that this is the follow up question of Parse text file, change some strings to camel case, add other strings . The parsing rules are similar but different:

  1. Replace the string "public static final String" with the string "export const" if that string occurs only once.
  2. Replace the string "public static final String" with the string "export enum" if similar strings (which has understores) occur more than once. Change all similar strings to the camel case string Str1. Append Str1 to the string "export enum".
  1. Replace the string "public static final int" with the string "export const" if that string occurs only once.
  2. Replace the string "public static final int" with the string "export enum" if similar strings (which has understores) occur more than once.

These are sample input and output.

input

    //Comment
    public static final String CUSTOMER_TYPE_CD_T_01 = "01";
    public static final String CUSTOMER_TYPE_CD_TB_02 = "02";
    public static final String CUSTOMER_TYPE_CD_TCC_03 = "03";
    public static final String CUSTOMER_TYPE_CD_TDDD_04 = "04";

    public static final String TEST_ING       = "TEST";

    //----------------------------------------
    //Comments
    //----------------------------------------
    public static final int    BEGIN_A_BB_C_D_EE_FFF_01      = 0;
    public static final int    END_A_BB_C_D_EE_FFF_01    = 2;

output

    //Comment
    export enum CustomerTypeCd {
        T_01 = "01",
        TB_02 = "02",
        TCC_03 = "03",
        TDDD_04 = "04",
    }

    export const TEST_ING = "TEST";

    //----------------------------------------
    //Comments
    //----------------------------------------
    export enum ABbCDEeFff01 {
        BEGIN = 0,
        END = 2,
    }

I modified the answer of: Parse text file, change some strings to camel case, add other strings as follows. It handles rules 1 and 2, fails to handle rules 3 and 4:

    function cap(s) { return substr(s, 1, 1) tolower(substr(s, 2)) } # capitalization

    function cc(s, a,    b, n, i) { # return a[1] = enum name, a[2] = key
      n = split(s, b, /_/); a[1] = ""
      for(i = 1; i < n; i++) a[1] = a[1] cap(b[i]) # camel-case
      a[2] = cap(b[n]) # key
    }

    function cc2(s, a,    b, n, i) { # return a[1] = enum name, a[2] = key
      n = split(s, b, /_/); a[1] = ""
      for(i = 1; i < n - 1; i++) a[1] = a[1] cap(b[i]) # camel-case
      a[2] = b[n - 1] "_" cap(b[n]) # key
    }

    function cc3(s, a,    b, n, i) { # return a[1] = enum name, a[2] = key
      n = split(s, b, /_/);
      enumkey = b[1]
      a[1] = ""
      for(i = 1; i < n - 1; i++) a[1] = a[1] cap(b[i]) # camel-case
      a[2] = b[n - 1] "_" cap(b[n]) # key
      a[1] = enumkey
    }

    /public static final String/ {
      # compute enum name (e), key (k), value without final ";" (v)
      cc2($5, ek); e = ek[1]; k = ek[2]; v = $NF; sub(/;[[:space:]]*$/, "", v)
      # if new enum name
      if(!(e in seen)) { seen[e] = 1; ne += 1; ename[ne] = e; cname[ne] = $5 }
      # add key and value
      nk[ne] += 1; key[ne,nk[ne]] = k; val[ne,nk[ne]] = v
      # key prefix if only-digits key
      if(k ~ /^[0-9]+$/) pfx[ne] = e
    }

    /public static final int/ {
      # compute enum name (e), key (k), value without final ";" (v)
      cc3($5, ek); e = ek[1]; k = ek[2]; v = $NF; sub(/;[[:space:]]*$/, "", v)
        
      # if new enum name
      if(!(e in seen)) { seen[e] = 1; ne += 1; ename[ne] = e; cname[ne] = $5 }
      # add key and value
      nk[ne] += 1; key[ne,nk[ne]] = k; val[ne,nk[ne]] = v
      # key prefix if only-digits key
      if(k ~ /^[0-9]+$/) pfx[ne] = e
    }

    END {
      for(i = 1; i <= ne; i++) { # for all enum/const
        # if only one key-value pair => const
        if(nk[i] == 1) print sep "export const " cname[i] " = " val[i,1] ";"
        else { # enum
          print sep "export enum " ename[i] " {"
          for(j = 1; j <= nk[i]; j++) print "\t" pfx[i] key[i,j] " = " val[i,j] ","
          print "}"
        }
        sep = "\n"
      }
    }

awk -V GNU Awk 5.0.1, API: 2.0 (GNU MPFR 4.0.2, GNU MP 6.2.0)


---------
EDIT: here is the above code formatted legibly by `gawk -o-`:

    /public static final String/ {
        # compute enum name (e), key (k), value without final ";" (v)
        cc2($5, ek)
        e = ek[1]
        k = ek[2]
        v = $NF
        sub(/;[[:space:]]*$/, "", v)
        # if new enum name
        if (! (e in seen)) {
            seen[e] = 1
            ne += 1
            ename[ne] = e
            cname[ne] = $5
        }
        # add key and value
        nk[ne] += 1
        key[ne, nk[ne]] = k
        val[ne, nk[ne]] = v
        # key prefix if only-digits key
        if (k ~ /^[0-9]+$/) {
            pfx[ne] = e
        }
    }
    
    /public static final int/ {
        # compute enum name (e), key (k), value without final ";" (v)
        cc3($5, ek)
        e = ek[1]
        k = ek[2]
        v = $NF
        sub(/;[[:space:]]*$/, "", v)
        # if new enum name
        if (! (e in seen)) {
            seen[e] = 1
            ne += 1
            ename[ne] = e
            cname[ne] = $5
        }
        # add key and value
        nk[ne] += 1
        key[ne, nk[ne]] = k
        val[ne, nk[ne]] = v
        # key prefix if only-digits key
        if (k ~ /^[0-9]+$/) {
            pfx[ne] = e
        }
    }
    
    END {
        for (i = 1; i <= ne; i++) {    # for all enum/const
            # if only one key-value pair => const
            if (nk[i] == 1) {
                print sep "export const " cname[i] " = " val[i, 1] ";"
            } else {    # enum
                print sep "export enum " ename[i] " {"
                for (j = 1; j <= nk[i]; j++) {
                    print "\t" pfx[i] key[i, j] " = " val[i, j] ","
                }
                print "}"
            }
            sep = "\n"
        }
    }
    
    
    function cap(s)
    {
        return (substr(s, 1, 1) tolower(substr(s, 2)))
    }
    
    # capitalization
    function cc(s, a, b, n, i)
    {
        # return a[1] = enum name, a[2] = key
        n = split(s, b, /_/)
        a[1] = ""
        for (i = 1; i < n; i++) {
            a[1] = a[1] cap(b[i])    # camel-case
        }
        a[2] = cap(b[n])    # key
    }
    
    function cc2(s, a, b, n, i)
    {
        # return a[1] = enum name, a[2] = key
        n = split(s, b, /_/)
        a[1] = ""
        for (i = 1; i < n - 1; i++) {
            a[1] = a[1] cap(b[i])    # camel-case
        }
        a[2] = b[n - 1] "_" cap(b[n])    # key
    }
    
    function cc3(s, a, b, n, i)
    {
        # return a[1] = enum name, a[2] = key
        n = split(s, b, /_/)
        enumkey = b[1]
        a[1] = ""
        for (i = 1; i < n - 1; i++) {
            a[1] = a[1] cap(b[i])    # camel-case
        }
        a[2] = b[n - 1] "_" cap(b[n])    # key
        a[1] = enumkey
    }


Solution

  • I'd blame tiredness for those problems, because while the carefully crafted cc2 works as intended, cc3() seems to be just three typos away from working:

    So by reindexing the loop, removing the unwanted line, and putting the key to a[2], you'll have your awk rework.

    Diff:

    -      for(i = 1; i < n - 1; i++) a[1] = a[1] cap(b[i]) # camel-case
    -      a[2] = b[n - 1] "_" cap(b[n]) # key
    -      a[1] = enumkey
    +      for(i = 2; i <= n; i++) a[1] = a[1] cap(b[i]) # camel-case
    +      a[2] = enumkey