I am trying to make a program in Node.js that would anonymize a given path for a word doc for a larger project. I have already unzipped the docx file and I have edited the document.xml file. All I need to do now is recompress it.
I have looked into using Archiver, but the problem is that it is zipping the folder to a .zip, so when you try to convert it to a docx, it is corrupted.
fs.readFile('./extracted_doc/word/document.xml', 'utf8', (err, data) => {
if (err) reject(err);
var name = data.indexOf('<w:t>')
var end = data.indexOf('<\/w:t>')
var result = data.replace(data.slice(name + 5, end), "XXXXXXXXXXXXXXXXXX")
fs.writeFile('./extracted_doc/word/document.xml', result, (err) => {
if (err) reject(err)
//zipping the file back to docx
var output = fs.createWriteStream('./anonymized_submission.docx')
var archive = archiver('zip')
archive.on('error', function (err) {
throw err;
})
archive.pipe(output)
archive.directory("./extracted_doc", "extracted_doc")
archive.finalize()
})
});
Here's a potential solution to your problem, I've tested and it works for me. It will replace the first line with 'XXXX...'.
The main issue with your existing code was it was creating a root directory 'extracted_doc' in the .zip file that contains the doc archive. This is not what Word is expecting. It expects the document structure in the root of the archive.
I've created the zipDirectory function to work around this. The main goal here is to preserve the directory structure of the archive.
const archiver = require("archiver");
const fs = require("fs");
fs.readFile('./extracted_doc/word/document.xml', 'utf8', (err, data) => {
if (err) reject(err);
var name = data.indexOf('<w:t>');
var end = data.indexOf('<\/w:t>');
var result = data.replace(data.slice(name + 5, end), "XXXXXXXXXXXXXXXXXX")
fs.writeFile('./extracted_doc/word/document.xml', result, (err) => {
if (err) reject(err);
zipDirectory('./extracted_doc/', './anonymized_submission.docx');
})
});
function zipDirectory(inputDir, outputFile) {
let archive = archiver('zip');
archive.on('error', function (err) {
throw err;
})
let output = fs.createWriteStream(outputFile);
archive.pipe(output);
/* Ok, so we don't want a root name of <input_dir>, this is our workaround. */
archive.directory(inputDir, '../');
archive.finalize();
}