I'm implementing segment tree from an array of data, and I also want to maintaining the max/min of the tree while updating a range of data. Here is my initial approach following this tutorial http://p--np.blogspot.com/2011/07/segment-tree.html.
Unfortunately it doesn't work at all, the logic makes sense to me, but I'm a little confused about b
and e
, I wonder is this the range of the data
array? or it's the actual range of the tree? From what I understand, the max_segment_tree[1]
should hold the max
of the range [1, MAX_RANGE]
while min_segment_tree[1]
should hold the min
of the range [1, MAX_RANGE]
.
int data[MAX_RANGE];
int max_segment_tree[3 * MAX_RANGE + 1];
int min_segment_tree[3 * MAX_RANGE + 1];
void build_tree(int position, int left, int right) {
if (left > right) {
return;
}
else if (left == right) {
max_segment_tree[position] = data[left];
min_segment_tree[position] = data[left];
return;
}
int middle = (left + right) / 2;
build_tree(position * 2, left, middle);
build_tree(position * 2 + 1, middle + 1, right);
max_segment_tree[position] = max(max_segment_tree[position * 2], max_segment_tree[position * 2 + 1]);
min_segment_tree[position] = min(min_segment_tree[position * 2], min_segment_tree[position * 2 + 1]);
}
void update_tree(int position, int b, int e, int i, int j, int value) {
if (b > e || b > j || e < i) {
return;
}
if (i <= b && j >= e) {
max_segment_tree[position] += value;
min_segment_tree[position] += value;
return;
}
update_tree(position * 2 , b , (b + e) / 2 , i, j, value);
update_tree(position * 2 + 1 , (b + e) / 2 + 1 , e , i, j, value);
max_segment_tree[position] = max(max_segment_tree[position * 2], max_segment_tree[position * 2 + 1]);
min_segment_tree[position] = min(min_segment_tree[position * 2], min_segment_tree[position * 2 + 1]);
}
EDIT Adding test cases:
#include <iostream>
#include <iomanip>
#include <vector>
#include <string>
#include <algorithm>
#include <map>
#include <set>
#include <utility>
#include <stack>
#include <deque>
#include <queue>
#include <fstream>
#include <functional>
#include <numeric>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cassert>
using namespace std;
const int MAX_RANGE = 20;
int data[MAX_RANGE];
int max_segment_tree[2 * MAX_RANGE];
int min_segment_tree[2 * MAX_RANGE];
int added_to_interval[2 * MAX_RANGE] = {0};
void update_bruteforce(int x, int y, int z, int &smallest, int &largest) {
for (int i = x - 1; i < y; ++i) {
data[i] += z;
}
// update min/max
smallest = data[0];
largest = data[0];
for (int i = 0; i < MAX_RANGE; ++i) {
if (data[i] < smallest) {
smallest = data[i];
}
if (data[i] > largest) {
largest = data[i];
}
}
}
void build_tree(int position, int left, int right) {
if (left > right) {
return;
}
else if (left == right) {
max_segment_tree[position] = data[left];
min_segment_tree[position] = data[left];
return;
}
int middle = (left + right) / 2;
build_tree(position * 2, left, middle);
build_tree(position * 2 + 1, middle + 1, right);
max_segment_tree[position] = max(max_segment_tree[position * 2], max_segment_tree[position * 2 + 1]);
min_segment_tree[position] = min(min_segment_tree[position * 2], min_segment_tree[position * 2 + 1]);
}
void update_tree(int position, int b, int e, int i, int j, int value) {
if (b > e || b > j || e < i) {
return;
}
if (i <= b && e <= j) {
max_segment_tree[position] += value;
min_segment_tree[position] += value;
added_to_interval[position] += value;
return;
}
update_tree(position * 2 , b , (b + e) / 2 , i, j, value);
update_tree(position * 2 + 1 , (b + e) / 2 + 1 , e , i, j, value);
max_segment_tree[position] = max(max_segment_tree[position * 2], max_segment_tree[position * 2 + 1]) + added_to_interval[position];
min_segment_tree[position] = min(min_segment_tree[position * 2], min_segment_tree[position * 2 + 1]) + added_to_interval[position];
}
void update(int x, int y, int value) {
// memset(added_to_interval, 0, sizeof(added_to_interval));
update_tree(1, 0, MAX_RANGE - 1, x - 1, y - 1, value);
}
namespace unit_test {
void test_show_data() {
for (int i = 0; i < MAX_RANGE; ++i) {
cout << data[i] << ", ";
}
cout << endl << endl;
}
void test_brute_force_and_segment_tree() {
// arrange
int number_of_operations = 100;
for (int i = 0; i < MAX_RANGE; ++i) {
data[i] = i + 1;
}
build_tree(1, 0, MAX_RANGE - 1);
// act
int operation;
int x;
int y;
int z;
int smallest = 1;
int largest = MAX_RANGE;
// assert
while (number_of_operations--) {
operation = rand() % 1;
x = 1 + rand() % MAX_RANGE;
y = x + (rand() % (MAX_RANGE - x + 1));
z = 1 + rand() % MAX_RANGE;
if (operation == 0) {
z *= 1;
}
else {
z *= -1;
}
cout << "left, right, value: " << x - 1 << ", " << y - 1 << ", " << z << endl;
update_bruteforce(x, y, z, smallest, largest);
update(x, y, z);
test_show_data();
cout << "correct:\n";
cout << "\tsmallest = " << smallest << endl;
cout << "\tlargest = " << largest << endl;
cout << "possibly correct:\n";
cout << "\tsmallest = " << min_segment_tree[1] << endl;
cout << "\tlargest = " << max_segment_tree[1] << endl;
cout << "\n--------------------------------------------------------------\n";
cin.get();
}
}
}
int main() {
unit_test::test_brute_force_and_segment_tree();
}
You need to store separately the max/min for each interval, AND what values have been added to it (just their sum). Here's how it could go wrong:
Suppose we're building a tree (I'll only show the min tree here) for the array [5, 1, 3, 7]. The tree looks like this:
1
1 3
5 1 3 7
Then we add 1 to the whole interval. The tree looks like this:
2
1 3
5 1 3 7
because the propagation has stopped on the first node since the updated interval covers it completely.
Then add 1 to the range [0-1]. This range does not cover the whole interval of the first node, so we update the children, and then set the min for the whole interval (that is, the value of the first node) to be the min of nodes 2 and 3. Here is the resulting tree:
2
2 3
5 1 3 7
And here is where it got wrong - there is no element 2 in the array, yet the tree claims that the min of the whole array is 2. This is happening because the lower levels of the tree never actually get the information that their values have been increased - the second node isn't aware of the fact that its values are not [5, 1] but rather [6, 2].
In order to make it work correctly, you can add a third array that keeps the values that have been added to whole intervals - say, int added_to_interval[3 * MAX_RANGE + 1];
. Then, when you're updating a whole interval (the case where i <= b && j >= e
), you also have to increment added_to_interval[position]
with value
. Also, when going up the tree to update the nodes from the values of the children, you also have to add that has been added to the whole interval (e.g. max_segment_tree[position] = max(max_segment_tree[position * 2], max_segment_tree[position * 2 + 1]) + added_to_interval[position];
).
EDIT:
Here are the changes to the code to make it working:
if (i <= b && j >= e) {
max_segment_tree[position] += value;
min_segment_tree[position] += value;
added_to_interval[position] += value;
return;
}
...
update_tree(position * 2 , b , (b + e) / 2 , i, j, value);
update_tree(position * 2 + 1 , (b + e) / 2 + 1 , e , i, j, value);
max_segment_tree[position] = max(max_segment_tree[position * 2], max_segment_tree[position * 2 + 1]) + added_to_interval[position];
min_segment_tree[position] = min(min_segment_tree[position * 2], min_segment_tree[position * 2 + 1]) + added_to_interval[position];
I haven't tested it extensively - I'm leaving that to you, but I tried a bunch of examples that seemed to work correctly.
Also, I don't think you need 3 * MAX_RANGE + 1 elements in the arrays - 2 * MAX_RANGE or something like that should be enough.