gitlocalizationmergeconflictgettext

Where's the 3-way Git merge driver for .PO (gettext) files?


I already have following

[attr]POFILE merge=merge-po-files

locale/*.po POFILE

in the .gitattributes and I'd like to get merging of branches to work correctly when the same localization file (e.g. locale/en.po) has been modified in paraller branches. I'm currently using following merge driver:

#!/bin/bash
# git merge driver for .PO files (gettext localizations)
# Install:
# git config merge.merge-po-files.driver "./bin/merge-po-files %A %O %B"

LOCAL="${1}._LOCAL_"
BASE="${2}._BASE_"
REMOTE="${3}._REMOTE_"

# rename to bit more meaningful filenames to get better conflict results
cp "${1}" "$LOCAL"
cp "${2}" "$BASE"
cp "${3}" "$REMOTE"

# merge files and overwrite local file with the result
msgcat "$LOCAL" "$BASE" "$REMOTE" -o "${1}" || exit 1

# cleanup
rm -f "$LOCAL" "$BASE" "$REMOTE"

# check if merge has conflicts
fgrep -q '#-#-#-#-#' "${1}" && exit 1

# if we get here, merge is successful
exit 0

However, the msgcat is too dumb and this is not a true three way merge. For example, if I have

  1. BASE version

    msgid "foo"
    msgstr "foo"
    
  2. LOCAL version

    msgid "foo"
    msgstr "bar"
    
  3. REMOTE version

    msgid "foo"
    msgstr "foo"
    

I'll end up with a conflict. However, a true three way merge driver would output correct merge:

msgid "foo"
msgstr "bar"

Note that I cannot simply add --use-first to msgcat because the REMOTE could contain the updated translation. In addition, if BASE, LOCAL and REMOTE are all unique, I still want a conflict, because that would really be a conflict.

What do I need to change to make this work? Bonus points for less insane conflict marker than '#-#-#-#-#', if possible.


Solution

  • Here's yet another answer from year 2021. I'm nowadays using following merge driver and this seem to work correctly for all cases I've tested. I have this stored as ./bin/merge-po-files in our repository.

    #!/bin/bash
    #
    # Three-way merge driver for PO files, runs on multiple CPUs where possible
    #
    # Copyright 2015-2016 Marco Ciampa
    # Copyright 2021 Mikko Rantalainen <mikko.rantalainen@iki.fi>
    # License: MIT (https://opensource.org/licenses/MIT)
    #
    # Original source:
    # https://stackoverflow.com/a/29535676/334451
    # https://github.com/mezis/git-whistles/blob/master/libexec/git-merge-po.sh
    #
    # Install with
    # git config merge.merge-po-files.driver "./bin/merge-po-files %A %O %B %P"
    #
    # Note that you also need file `.gitattributes` with following lines:
    #
    # [attr]POFILE merge=merge-po-files
    # locale/*.po POFILE
    #
    ##########################################################################
    # CONFIG:
    
    # Formatting flags to be be used to produce merged .po files
    # This can be set to match project needs for the .po files.
    # NOTE: $MSGCAT_FINAL_FLAGS will be passed to msgcat without quotation
    MSGCAT_FINAL_FLAGS="--no-wrap --sort-output"
    
    # Verbosity level:
    # 0: Silent except for real errors
    # 1: Show simple header for each file processed
    # 2: Also show all conflicts in merge result (both new and existing)
    # 3: Also show all status messages with timestamps
    VERBOSITY="${VERBOSITY:=2}"
    
    ##########################################################################
    # Implementation:
    
    # Use logical names for arguments:
    LOCAL="$1"
    BASE="$2"
    OTHER="$3"
    FILENAME="$4"
    OUTPUT="$LOCAL"
    
    # The temporary directory for all files we need - note that most files are
    # created without extensions to emit nicer conflict messages where gettext
    # likes to embed the basename of the file in the conflict message so we
    # use names like "local" and "other" instead of e.g. "local.G2wZ.po".
    TEMP="$(mktemp -d /tmp/merge-po.XXXXXX)"
    
    
    # abort on any error and report the details if possible
    set -E
    set -e
    on_error()
    {
        local parent_lineno="$1"
        local message="$3"
        local code="$2"
        if [[ -n "$message" ]] ; then
            printf "### $0: error near line %d: status %d: %s\n" "${parent_lineno}" "${code}" "${message}" 1>&2
        else
            printf "### $0: error near line %d: status %d\n" "${parent_lineno}" "${code}" 1>&2
        fi
        exit 255
    }
    trap 'on_error ${LINENO} $?' ERR
    
    
    # Maybe print message(s) to stdout with timestamps
    function status()
    {
        if test "$VERBOSITY" -ge 3
        then
            printf "%s %s\n" "$(date '+%Y-%m-%d %H:%M:%S.%3N')" "$@"
        fi
    }
    
    # Quietly take translations from $1 and apply those according to template $2
    # (and do not use fuzzy-matching, always generate output)
    # also supports all flags to msgmerge
    function apply_po_template()
    {
        msgmerge --force-po --quiet --no-fuzzy-matching "$@"
    }
    
    # Take stdin, remove the "graveyard strings" and emit the result to stdout
    function strip_graveyard()
    {
        msgattrib --no-obsolete
    }
    
    # Take stdin, keep only confict lines and emit the result to stdout
    function only_conflicts()
    {
        msggrep --msgstr -F -e '#-#-#-#-#' -
        # alternative slightly worse implementation: msgattrib --only-fuzzy
    }
    
    # Take stdin, discard confict lines and emit the result to stdout
    function without_conflicts()
    {
        msggrep -v --msgstr -F -e '#-#-#-#-#' -
        # alternative slightly worse implementation: msgattrib --no-fuzzy
    }
    
    # Select messages from $1 that are also in $2 but whose contents have changed
    # and emit results to stdout
    function extract_changes()
    {
        # Extract conflicting changes and discard any changes to graveyard area only
        msgcat -o - "$1" "$2" \
        | only_conflicts \
        | apply_po_template -o - "$1" - \
        | strip_graveyard
    }
    
    # Emit only the header of $1, supports flags of msggrep
    function extract_header()
    {
        # Unfortunately gettext really doesn't support extracting just header
        # so we have to get creative: extract only strings that originate
        # from file called "//" which should result to header only
         msggrep --force-po -N // "$@"
    
        # Logically msggrep --force-po -v -K -E -e '.' should return the header
        # only but msggrep seems be buggy with msgids with line feeds and output
        # those, too
    }
    
    # Take file in $1 and show conflicts with colors in the file to stdout
    function show_conflicts()
    {
        OUTPUT="$1"
        shift
        # Count number of lines to remove from the output and output conflict lines without the header
        CONFLICT_HEADER_LINES=$(cat "$OUTPUT" | msggrep --force-po --color=never --msgstr -F -e '#-#-#-#-#' - | extract_header - | wc -l)
        # tail wants line number of the first displayed line so we want +1 here:
        CONFLICTS=$(cat "$OUTPUT" | msggrep --force-po --color --msgstr -F -e '#-#-#-#-#' - | tail -n "+$((CONFLICT_HEADER_LINES+1))")
        if test -n "$CONFLICTS"
        then
            #echo "----------------------------"
            #echo "Conflicts after merge:"
            echo "----------------------------"
            printf "%s\n" "$CONFLICTS"
            echo "----------------------------"
        fi
    }
    
    # Sanity check that we have a sensible temporary directory
    test -n "$TEMP" || exit 125
    test -d "$TEMP" || exit 126
    test -w "$TEMP" || exit 127
    
    if test "$VERBOSITY" -ge 1
    then
        printf "Using gettext .PO merge driver: %s ...\n" "$FILENAME"
    fi
    
    # Extract the PO header from the current branch (top of file until first empty line)
    extract_header -o "${TEMP}/header" "$LOCAL"
    
    ##########################################################################
    # Following parts can be run partially parallel and "wait" is used to syncronize processing
    
    
    # Clean input files and use logical filenames for possible conflict markers:
    status "Canonicalizing input files ..."
    msguniq --force-po -o "${TEMP}/base" --unique "${BASE}" &
    msguniq --force-po -o "${TEMP}/local" --unique "${LOCAL}" &
    msguniq --force-po -o "${TEMP}/other" --unique "${OTHER}" &
    wait
    
    status "Computing local-changes, other-changes and unchanged ..."
    msgcat --force-po -o - "${TEMP}/base" "${TEMP}/local" "${TEMP}/other" | without_conflicts > "${TEMP}/unchanged" &
    extract_changes "${TEMP}/local" "${TEMP}/base" > "${TEMP}/local-changes" &
    extract_changes "${TEMP}/other" "${TEMP}/base" > "${TEMP}/other-changes" &
    wait
    
    # Messages changed on both local and other (conflicts):
    status "Computing conflicts ..."
    msgcat --force-po -o - "${TEMP}/other-changes" "${TEMP}/local-changes" | only_conflicts > "${TEMP}/conflicts"
    
    # Messages changed on local, not on other; and vice-versa:
    status "Computing local-only and other-only changes ..."
    msgcat --force-po -o "${TEMP}/local-only"  --unique "${TEMP}/local-changes"  "${TEMP}/conflicts" &
    msgcat --force-po -o "${TEMP}/other-only" --unique "${TEMP}/other-changes" "${TEMP}/conflicts" &
    wait
    
    # Note: following steps require sequential processing and cannot be run in parallel
    
    status "Computing initial merge without template ..."
    # Note that we may end up with some extra so we have to apply template later
    msgcat --force-po -o "${TEMP}/merge1" "${TEMP}/unchanged" "${TEMP}/conflicts" "${TEMP}/local-only" "${TEMP}/other-only"
    
    # Create a template to only output messages that are actually needed (union of messages on local and other create the template!)
    status "Computing template and applying it to merge result ..."
    msgcat --force-po -o - "${TEMP}/local" "${TEMP}/other" | apply_po_template -o "${TEMP}/merge2" "${TEMP}/merge1" -
    
    # Final merge result is merge2 with original header
    status "Fixing the header after merge ..."
    msgcat --force-po $MSGCAT_FINAL_FLAGS -o "${TEMP}/merge3" --use-first "${TEMP}/header" "${TEMP}/merge2"
    
    # Produce output file (overwrites input LOCAL file because git expects that for the results)
    status "Saving output ..."
    mv "${TEMP}/merge3" "$OUTPUT"
    
    status "Cleaning up ..."
    
    rm "${TEMP}"/*
    rmdir "${TEMP}"
    
    status "Checking for conflicts in the result ..."
    
    # Check for conflicts in the final merge
    if grep -q '#-#-#-#-#' "$OUTPUT"
    then
        if test "$VERBOSITY" -ge 1
        then
            printf "### Conflict(s) detected ###\n"
        fi
    
        if test "$VERBOSITY" -ge 2
        then
            # Verbose diagnostics
            show_conflicts "$OUTPUT"
        fi
    
        status "Automatic merge failed, exiting with status 1."
        exit 1
    fi
    
    status "Automatic merge completed successfully, exiting with status 0."
    exit 0
    

    This variant is based on version in the answer by @mezis in this same question but it has following improvements: