HackerRank/Heaps/Find the Running Median

Problem Summary

Given N integers in order, for each ith integer, add it to a running list of integers, find the median of the updated list and print the median. (1 ≤ N ≤ 10^5)

Solution 1

The median is the number in the middle of a sorted list. It is unnecessary to sort the entire list to find the median. Instead, we can split the list into two groups of numbers, each containing about half of the list and the number in the first greater than the numbers in the other. Then we only need the biggest number of the first group and the smallest of the second group.

We can keep two heaps A and B , which have the following properties:

  1. A contains the bigger half of current list and A is a minimum heap.
  2. B contains the smaller half of current list and B is a maximum heap.
  3. The absolute difference between the sizes of A and B is less than 2.

For each ith integer, nums[i], we push it into A, if A is empty or nums[i] ≥ A.top() . Else, we push it into B.
Then we balance the two heaps, making sure the property 3 still holds.
If i is odd, then the top of the bigger heap is the current median.
If i is even, then the average of the sum of the tops of A and B is the current median.

The time complexity is O(NlogN).

Solution 2

We can use a heap, or a binary search tree to store the list.
For each node i, we use a variable “sum” to denote the number of nodes in the subtree with i as root. With this we can find the nth number of the list in O(logN) time.

For each ith integer, we insert it into this BST, updating nodes along the way. Then, if i is odd, we find the (i+1)/2 th number in the list. Else, we find the i/2 th and the i/2+1 th number, and calculate the average.

The time complexity is also O(NlogN), but it might be harder to code.

Code for Solution 1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <cstdio>
#include <iostream>
#include <cstring>
#include <algorithm>
#include <cmath>
using namespace std;
struct NumsHeap
{
bool is_min_heap;
int size, nums[100010];
NumsHeap(bool flag) { is_min_heap = flag; size = 0; }
void push(int x);
void pop();
int top() { return size > 0 ? nums[1] : -1; }
} min_heap(true), max_heap(false);
void NumsHeap::push(int x)
{
nums[++size] = x;
int i = size, j = i>>1;
while (j>0)
{
if (is_min_heap ^ (nums[j] > nums[i]))
break;
int tmp = nums[i]; nums[i] = nums[j]; nums[j] = tmp;
i = j;
j = i>>1;
}
}
void NumsHeap::pop()
{
--size;
if (size == 0)
return;
nums[1] = nums[size+1];
int i = 1, j = i<<1;
while (j<=size)
{
if (is_min_heap && (nums[i] <= nums[j] && (j==size || (j<size && nums[i] <= nums[j+1]))))
break;
if (!is_min_heap && (nums[i] >= nums[j] && (j==size || (j<size && nums[i] >= nums[j+1]))))
break;
if (j<size && (is_min_heap ^ (nums[j+1] > nums[j])))
++j;
int tmp = nums[i]; nums[i] = nums[j]; nums[j] = tmp;
i = j;
j = i<<1;
}
}
void rebalance()
{
if (min_heap.size > max_heap.size + 1)
{
max_heap.push(min_heap.top());
min_heap.pop();
}
else
if (max_heap.size > min_heap.size + 1)
{
min_heap.push(max_heap.top());
max_heap.pop();
}
}
int main()
{
int n;
scanf("%d",&n);
for (int i = 1; i <= n; ++i)
{
int j;
scanf("%d",&j);
if (min_heap.size == 0 || j >= min_heap.top())
min_heap.push(j);
else
max_heap.push(j);
rebalance();
if (i&1)
{
if (min_heap.size > max_heap.size)
printf("%.1f\n",1.0 * min_heap.top());
else
printf("%.1f\n",1.0 * max_heap.top());
}
else
printf("%.1f\n",0.5 * (min_heap.top() + max_heap.top()));
}
return 0;
}

Code for Solution 2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#include <cstdio>
#include <iostream>
#include <cstring>
#include <algorithm>
#include <cmath>
using namespace std;
int heap_size = 0;
struct Node
{
int sum,val,l,r;
Node() { sum = 0; val = l = r = -1; }
} nums_heap[100010];
void insert_node(int x)
{
if (heap_size == 0)
{
heap_size = 1;
nums_heap[1].sum = 1;
nums_heap[1].val = x;
return;
}
int i = 1;
while (i>0)
{
++ nums_heap[i].sum;
if (x < nums_heap[i].val)
{
if (nums_heap[i].l == -1)
{
nums_heap[i].l = ++heap_size;
i = nums_heap[i].l;
nums_heap[i].sum = 1;
nums_heap[i].val = x;
return;
}
i = nums_heap[i].l;
}
else
{
if (nums_heap[i].r == -1)
{
nums_heap[i].r = ++heap_size;
i = nums_heap[i].r;
nums_heap[i].sum = 1;
nums_heap[i].val = x;
return;
}
i = nums_heap[i].r;
}
}
}
int get_nth(int n)
{
int i = 1;
while (i>0)
{
if (nums_heap[i].l == -1 && n==1)
return nums_heap[i].val;
if (nums_heap[i].l != -1 && n == nums_heap[nums_heap[i].l].sum +1)
return nums_heap[i].val;
if (nums_heap[i].l != -1 && nums_heap[nums_heap[i].l].sum >= n)
{
i = nums_heap[i].l;
continue;
}
if (nums_heap[i].l != -1)
n = n - 1 - nums_heap[nums_heap[i].l].sum;
else
--n;
i = nums_heap[i].r;
}
return -1;
}
int main()
{
int n;
scanf("%d",&n);
for (int i = 1; i <= n; ++i)
{
int j;
scanf("%d",&j);
insert_node(j);
if (i&1)
printf("%.1f\n",1.0*get_nth((i+1)/2));
else
printf("%.1f\n",0.5*(get_nth(i/2)+get_nth(i/2+1)));
}
return 0;
}