bashunixbioinformaticsgenomegoogle-genomics

using awk to print header name and a substring


i try using this code for printing a header of a gene name and then pulling a substring based on its location but it doesn't work

>output_file
cat input_file | while read row; do
        echo $row > temp
        geneName=`awk '{print $1}' tmp`
        startPos=`awk '{print $2}' tmp`
        endPOs=`awk '{print $3}' tmp`
                for i in temp; do
                echo ">${geneName}" >> genes_fasta ;
                echo "awk '{val=substr($0,${startPos},${endPOs});print val}' fasta" >> genes_fasta
        done
done

input_file

nad5_exon1 250405 250551
nad5_exon2 251490 251884
nad5_exon3 195620 195641
nad5_exon4 154254 155469
nad5_exon5 156319 156548

fasta

atgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgcatgc............

and this is my wrong output file

>
awk '{val=substr(pull_genes.sh,,);print val}' unwraped_carm_mt.fasta
>
awk '{val=substr(pull_genes.sh,,);print val}' unwraped_carm_mt.fasta
>
awk '{val=substr(pull_genes.sh,,);print val}' unwraped_carm_mt.fasta
>
awk '{val=substr(pull_genes.sh,,);print val}' unwraped_carm_mt.fasta
>
awk '{val=substr(pull_genes.sh,,);print val}' unwraped_carm_mt.fasta
>
awk '{val=substr(pull_genes.sh,,);print val}' unwraped_carm_mt.fasta

output should look like that:

>name1
atgcatgcatgcatgcatgcat
>name2
tgcatgcatgcatgcat
>name3
gcatgcatgcatgcatgcat
>namen....

Solution

  • made it work! this is the script for pulling substrings from a fasta file

    cat genes_and_bounderies1 | while read row; do
            echo $row > temp
            geneName=`awk '{print $1}' temp`
            startPos=`awk '{print $2}' temp`
            endPos=`awk '{print $3}' temp`
            length=$(expr $endPos - $startPos)
                    for i in temp; do
                    echo ">${geneName}" >> genes_fasta
                    awk -v S=$startPos -v L=$length '{print substr($0,S,L)}' unwraped_${fasta} >> genes_fasta
            done
    done