Consider two files
file 1:
name|gender|phone|email|city|country
abc|F|11111|ldiskmsjdh|pune|india
xyz|M|22222|wassrrrrtf|delhi|india
file2:
sno|name|email|country
1|abc|zzzzzzzz|USA
2|mnq|tttttttt|UK
i need the below output needed using unix:
name|gender|phone|email|city|country
abc|F|11111|zzzzzzzz|pune|USA
xyz|M|22222|wassrrrrtf|delhi|india
NOTE: Match based on primary key i.e “name”
I was able to replace the entire line. But I want to replace only columns for rows when there is match and update only those columns that are present in second file. original format should not change.
I am looking for a general code that will work for N number of columns. Key position shall remain the same.
I wouldn't normally post an answer when the OP hasn't provided any attempt of their own but since there are multiple answers posted already...
This may be what you want, using any awk:
$ cat tst.awk
BEGIN { FS=OFS="|" }
NR == FNR {
if ( FNR == 1 ) {
for ( i=1; i<=NF; i++ ) {
tags2aFldNrs[$i] = i
}
}
else {
name = $(tags2aFldNrs["name"])
names2aVals[name] = $0
}
next
}
{
if ( FNR == 1 ) {
for ( i=1; i<=NF; i++ ) {
tags2bFldNrs[$i] = i
if ( $i in tags2aFldNrs ) {
bFldNrs2aFldNrs[i] = tags2aFldNrs[$i]
}
}
}
else {
name = $(tags2bFldNrs["name"])
if ( name in names2aVals ) {
split(names2aVals[name],aVals)
for ( bFldNr in bFldNrs2aFldNrs ) {
aFldNr = bFldNrs2aFldNrs[bFldNr]
$bFldNr = aVals[aFldNr]
}
}
}
print
}
$ awk -f tst.awk file2 file1
name|gender|phone|email|city|country
abc|F|11111|zzzzzzzz|pune|USA
xyz|M|22222|wassrrrrtf|delhi|india
That'll work efficiently no matter how many fields you have on each line as it only loops through the subset of fields that have column header tags that are common between the 2 files and only when the name
matches. It'll also work no matter where the name
column exists in each file.