I am able to achieve recursive file traversal in a directory (i.e to explore all the subdirectories and files in a directory). For that I have used an answer from a respective post on stack overflow. The snippet of that is below:
var fs = require("fs");
var tree = function(dir, done) {
var results = {
"path": dir,
"children": []
};
fs.readdir(dir, function(err, list) {
if (err) { return done(err); }
var pending = list.length;
if (!pending) { return done(null, results); }
list.forEach(function(file) {
fs.stat(dir + '/' + file, function(err, stat) {
if (stat && stat.isDirectory()) {
tree(dir + '/' + file, function(err, res) {
results.children.push(res);
if (!--pending){ done(null, results); }
});
} else {
results.children.push({"path": dir + "/" + file});
if (!--pending) { done(null, results); }
}
});
});
});
};
module.exports = tree;
When I run:
tree(someDirectoryPath, function(err, results) {
if (err) throw err;
console.log(results);
});
I get a sample result, such as this one:
{ path: '/Users/UserName/Desktop/1',
children:
[ { path: '/Users/UserName/Desktop/1/file1' },
{ path: '/Users/UserName/Desktop/1/file2' },
{ path: '/Users/UserName/Desktop/1/file3' },
{ path: '/Users/UserName/Desktop/1/subdir1',
children: [Object] } ] }
I am also able to hash a single file in a specific location, by using the fs' module ReadStream method. The snippet for that is below:
/**
* Checking File Integrity
*/
var fs = require('fs'),
args = process.argv.splice('2'),
path = require('path'),
traverse = require('/Users/UserName/Desktop/tree.js'),
crypto = require('crypto');
//var algorithm = ['md5', 'sha1', 'sha256', 'sha512'];
var algorithm = 'sha512';
var hashTable = new Array();
var hash = crypto.createHash(algorithm);
var fileStream = fs.ReadStream(args[0]);
fileStream.on('data', function(data) {
hash.update(data);
fileStream.on('end', function() {
var digest = hash.digest('hex');
console.log('algorithm used: ', algorithm);
console.log('hash for the file: ',digest);
hashTable[args[0]] = digest;
console.log(hashTable);
});
});
Where args[0] stores the location of the file to be read by the ReadStream. After hashing of a specific file, the console log returned is as follows:
node fileIntegrityChecker.js hello.txt
algorithm used: sha512
hash for the file: 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043
the hashtable is: [ 'hello.txt': '9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043' ]
My problem is that I tried to somehow integrate the tree module functionality in the hash related js file. My idea is that the program will capture the user's input, as a path to a directory and that input will be processed to traverse the whole subdirectories and files of a folder. Also, the fileStream.on
method should be included in the callback from the tree module. However I am not fully initiated in the callback mechanism and I hope to get some insight from you.
This is what I've tried
/**
* Checking File Integrity
*/
var fs = require('fs'),
args = process.argv.splice('2'),
path = require('path'),
tree = require('/Users/UserName/Desktop/tree.js'),
crypto = require('crypto');
//var algorithm = ['md5', 'sha1', 'sha256', 'sha512'];
var algorithm = 'sha512';
var hashTable = new Array();
var pathString = 'Users/UserName/Desktop/1';
tree(pathString, function(err, results) {
if (err) throw err;
var hash = crypto.createHash(algorithm);
var fileStream = fs.ReadStream(results.children[1]['path']);
fileStream.on('data', function(data) {
hash.update(data);
fileStream.on('end', function() {
var digest = hash.digest('hex');
console.log('algorithm used: ', algorithm);
console.log('hash for the file: ',digest);
hashTable[results.children[1]['path']] = digest;
console.log('The hashtable is: ', hashTable);
});
});
});
Now, I've made some progress in the sense that I don't receive an error. Basically I achieved my scope. However I am able to extract only one result explicitly. For some reason, I cannot think how to iteratively (for instance) get each child of the result JSON object. If that is solved, I think the problem will be completely solved.
Can you please show me a way how to successfully combine the module and the js file to recursively traverse all the contents of a directory and create a hash for every file in it. I need this to ultimately check if some changes in the files occurred, based on their hashes. Thank you!
The simplest thing to do would be to generate the hash while you are already walking the directory tree. This involves updating the tree.js file as follows:
} else {
var fname = dir + "/" + file};
// put your hash generation here
generateHash(fname, function (e, hash) {
if (e) done(e);
results.children.push({"path": fname, "hash" : hash);
if (!--pending) {
done(null, results);
}
});
}
Then put your hash generation code in a function like this:
function generateHash (filename, callback) {
var algorithm = 'sha512';
var hashTable = new Array();
var hash = crypto.createHash(algorithm);
var fileStream = fs.ReadStream(filename);
fileStream.on('data', function(data) {
hash.update(data);
});
fileStream.on('end', function() {
var digest = hash.digest('hex');
callback(null, digest);
});
}