regexbashawkgrepcopybook

How to repeat a block of lines using awk?


I'm trying to repeat the block of lines avobe the OCCURS word the number of times inticated in the line. The block of lines to repeat have a smaller number at the start of the line.

I mean, with this input:

01  PATIENT-TREATMENTS.
   05  PATIENT-NAME                PIC X(30).
   05  PATIENT-SS-NUMBER           PIC 9(9).
   05  NUMBER-OF-TREATMENTS        PIC 99 COMP-3.
   05  TREATMENT-HISTORY OCCURS 2.
       10  TREATMENT-DATE OCCURS 3.
           15  TREATMENT-DAY        PIC 99.
           15  TREATMENT-MONTH      PIC 99.
           15  TREATMENT-YEAR       PIC 9(4).
       10  TREATING-PHYSICIAN       PIC X(30).
       10  TREATMENT-CODE           PIC 99.
   05 HELLO PIC X(9).
   05 STACK OCCURS 2.
       10 OVERFLOW PIC X(99).

This would be the output:

01  PATIENT-TREATMENTS.
   05  PATIENT-NAME                PIC X(30).
   05  PATIENT-SS-NUMBER           PIC 9(9).
   05  NUMBER-OF-TREATMENTS        PIC 99 COMP-3.
   05  TREATMENT-HISTORY OCCURS 2.
       10  TREATMENT-DATE OCCURS 3.
           15  TREATMENT-DAY        PIC 99.
           15  TREATMENT-MONTH      PIC 99.
           15  TREATMENT-YEAR       PIC 9(4).
           15  TREATMENT-DAY        PIC 99.
           15  TREATMENT-MONTH      PIC 99.
           15  TREATMENT-YEAR       PIC 9(4).
           15  TREATMENT-DAY        PIC 99.
           15  TREATMENT-MONTH      PIC 99.
           15  TREATMENT-YEAR       PIC 9(4).
       10  TREATING-PHYSICIAN       PIC X(30).
       10  TREATMENT-CODE           PIC 99.
           15  TREATMENT-DAY        PIC 99.
           15  TREATMENT-MONTH      PIC 99.
           15  TREATMENT-YEAR       PIC 9(4).
           15  TREATMENT-DAY        PIC 99.
           15  TREATMENT-MONTH      PIC 99.
           15  TREATMENT-YEAR       PIC 9(4).
           15  TREATMENT-DAY        PIC 99.
           15  TREATMENT-MONTH      PIC 99.
           15  TREATMENT-YEAR       PIC 9(4).
       10  TREATING-PHYSICIAN       PIC X(30).
       10  TREATMENT-CODE           PIC 99.
   05 HELLO PIC X(9).
   05 STACK OCCURS 2.
       10 OVERFLOW PIC X(99).
       10 OVERFLOW PIC X(99).

I tried it by this way:

tac input.txt |
awk '
BEGIN {
 lbuff="";
 n=0;
}{

  if($0 ~ /^\s*$/) {next;}

  if ($3 == "OCCURS") {
    lev_oc=$1
    len_oc=$4
    lstart=0

    for (x=1; x<n; x++) {
      split(saved[x],saved_level," ")
      if (saved_level[1] <= lev_oc) {
        print saved[x]
        lstart=x+1
      }
    }

    for (i=1; i<=len_oc; i++) {
      for (x=lstart; x<n; x++) {
        print saved[x]
      }
    }

    print $0

  }else if ($0) {
    saved[n]=$0
    n++
  }

}' | tac

But I don't get the result what I'm trying to obtain. Is awk the best way to do it? Do you have any alternative?


Solution

  • I used perl for this because it's easy to make arbitrarily complex data structures:

    #!/usr/bin/perl
    use strict;
    use warnings;
    
    # read the file into an array of lines.
    open my $f, '<', shift;
    my @lines = <$f>;
    close $f;
    
    my @occurring;
    my @occurs;
    
    # iterate over the lines of the file
    for (my $i = 0; $i < @lines; $i++) {
        # extract the "level", the first word of the line
        my $level = (split ' ', $lines[$i])[0];
    
        # if this line contains the OCCURS string,
        # push some info onto a stack.
        # This marks the start of something to be repeated
        if ($lines[$i] =~ /OCCURS (\d+)/) {
            push @occurring, [$1-1, $level, $i+1];
            next;
        }
    
        # if this line is at the same level as the level of the start of the
        # last seen item on the stack, mark the last line of the repeated text
        if (@occurring and $level eq $occurring[-1][1]) {
            push @occurs, [@{pop @occurring}, $i-1];
        }
    }
    
    # If there's anything open on the stack, it ends at the last line
    while (@occurring) {
        push @occurs, [@{pop @occurring}, $#lines];
    }
    
    # handle all the lines to be repeated by appending them to the last
    # line of the repetition
    for (@occurs) {
        my $repeated = "";
        my ($count, undef, $start, $stop) = @$_;
        $repeated .= join "", @lines[$start..$stop] for (1..$count);
        $lines[$stop] .= $repeated;
    }
    
    print @lines;
    

    For your reading pleasure, here's an awk translation.

    BEGIN {
        s = 0
        f = 0
    }
    
    function stack2frame(lineno) {
        f++
        frame[f,"reps"] = stack[s,"reps"]
        frame[f,"start"] = stack[s,"start"]
        frame[f,"stop"] = lineno
        s--
    }
    
    { 
        lines[NR] = $0
        level = $1
    }
    
    # if this line contains the OCCURS string, push some info onto a stack.
    # This marks the start of something to be repeated
    $(NF-1) == "OCCURS" {
        s++
        stack[s,"reps"] = $NF-1
        stack[s,"level"] = level
        stack[s,"start"] = NR+1
        next
    }
    
    # if this line is at the same level as the level of the start of the
    # last seen item on the stack, mark the last line of the repeated text
    level == stack[s,"level"] {
        stack2frame(NR-1)
    }
    
    END {
        # If there's anything open on the stack, it ends at the last line
        while (s) {
            stack2frame(NR)
        }
    
        # handle all the lines to be repeated by appending them to the last
        # line of the repetition
        for (i=1; i<=f; i++) {
            repeated = ""
            for (j=1; j <= frame[i,"reps"]; j++) {
                for (k = frame[i,"start"]; k <= frame[i,"stop"]; k++) {
                    repeated = repeated ORS lines[k]
                }
            }
            lines[frame[i,"stop"]] = lines[frame[i,"stop"]] repeated
        }
    
        for (i=1; i <= NR; i++) 
            print lines[i]
    }