online advertising

## Saturday, March 19, 2016

### LeetCode OJ - Minimum Height Trees

Problem:

Please find the problem here.

Analysis:

As a disclaimer, I cheated. I knew people are talking about an $O(n)$ algorithm. So I focused my energy thinking about a linear algorithm and therefore I found it.

A simple 'brute force' solution is to do a BFS starting from every possible root and record the heights, that will allow us to pick the right node. This is a $O(n^2)$ algorithm.

The question here is, how do I use a one pass algorithm to know all the heights?

Solution:

Turn out one pass is not enough for me, I used two passes. Let me describe how it works.

At first, I thought I would use BFS because that will give me the shortest path. Turn out, as the given graph is a tree, doing DFS will also the same shortest path tree (after all, there is only one path between any pair), and DFS give me more flexibility with message passing, so I chose DFS.

Imagine in a every node, we have a sign post saying the longest path we have following the direction, then we can simply read the answer by going through each node and take the maximum out of the sign post values, our goal is to compute the sign post values.

In a DFS, the sign post value can be readily computed for all but the parent path. Just record for each child what was the height of the subtree and that's all. This is the first pass in the code.

In the second pass, we compute the cost for the parent path. For that purpose, we do another pass DFS. For the root node, it has no parent and therefore it is easy to imagine a fake parent -1 with cost 0 for it. For all other node B with parent node A, the parent cost for node B is the maximum of all neighbors (including parent) of A with B itself excluded.

If we keep the full sign post, then we will need to do an actual maximum computation when we want to extract the result. Note that we only need a few operations for the sign post.

Add a cost pointing to a particular direction.
Find the maximum over all directions.
Find the maximum over all directions except one particular direction.

This is easily implemented with just keeping track of the maximum, the maximum direction, and the second maximum. That is our summary class in the code.

Without further ado, this is the code:

Code:

#include "stdafx.h"

// https://leetcode.com/problems/minimum-height-trees/

#include "LEET_MINIMUM_HEIGHT_TREES.h"
#include <list>
#include <iostream>
#include <vector>
#include <algorithm>

using namespace std;

namespace _LEET_MINIMUM_HEIGHT_TREES
{
class Solution {
private:
class summary
{
public:
int max;
int max_id;
int second_max;
summary()
{
max = second_max = 0;
max_id = -1;
}

void add(int id, int cost)
{
if (cost > max)
{
second_max = max;
max = cost;
max_id = id;
}
else if (cost > second_max)
{
second_max = cost;
}
}

int get_max()
{
return this->max;
}

int get_max_but_id(int id)
{
if (id == this->max_id)
{
return this->second_max;
}
else
{
return this->max;
}
}
};
public:
void first_pass(int parent, int node, vector<vector<int>>& adjacency_list, vector<summary>& summaries)
{
for (size_t i = 0; i < adjacency_list[node].size(); i++)
{
int neighbor = adjacency_list[node][i];
if (neighbor != parent)
{
first_pass(node, neighbor, adjacency_list, summaries);
summaries[node].add(neighbor, 1 + summaries[neighbor].get_max());
}
}
}

void second_pass(int parent, int node, int parent_cost, vector<vector<int>>& adjacency_list, vector<summary>& summaries)
{
summaries[node].add(parent, parent_cost);
for (size_t i = 0; i < adjacency_list[node].size(); i++)
{
int neighbor = adjacency_list[node][i];
if (neighbor != parent)
{
second_pass(node, neighbor, 1 + summaries[node].get_max_but_id(neighbor), adjacency_list, summaries);
}
}
}

vector<int> findMinHeightTrees(int n, vector<pair<int, int>>& edges)
{
vector<vector<int>> adjacency_list(n);
vector<summary> summaries(n);
for (size_t e = 0; e < edges.size(); e++)
{
adjacency_list[edges[e].first].push_back(edges[e].second);
adjacency_list[edges[e].second].push_back(edges[e].first);
}

first_pass(-1, 0, adjacency_list, summaries);
second_pass(-1, 0, 0, adjacency_list, summaries);

vector<int> result;
int min_max = 100000;
for (int i = 0; i < n; i++)
{
min_max = min(min_max, summaries[i].get_max());
}
for (int i = 0; i < n; i++)
{
if (summaries[i].get_max() == min_max)
{
result.push_back(i);
}
}
return result;
}
};
};

using namespace _LEET_MINIMUM_HEIGHT_TREES;

int LEET_MINIMUM_HEIGHT_TREES()
{
Solution solution;
vector<pair<int, int>> edges;

edges.push_back(pair<int, int>(0, 1));
edges.push_back(pair<int, int>(1, 3));
edges.push_back(pair<int, int>(1, 4));
edges.push_back(pair<int, int>(0, 2));
edges.push_back(pair<int, int>(4, 5));

solution.findMinHeightTrees(6, edges);
return 0;
}

## Friday, March 18, 2016

### LeetCode OJ - Count of Smaller Numbers After Self

Problem:

Please find the problem here.

Analysis:

A straightforward solution for this one is simply actually count them, that would mean a $O(n^2)$ algorithm, can we do faster?

Note that the last entry must be zero, can we build that up from the end? As a first thought, we might be able to by having a data structure that can tell me the answer, then we can incrementally update the answer. The data structure must tell the number of numbers less than a certain value, a sorted list, or binary search tree, would be ideal. That would mean a performance of $O(n \log n)$.

The only drawback would be complicated. Building a balanced binary search tree is complicated, can we simplify?

Solution:

If we wanted an $O(n \log n)$ solution, we would normally think divide and conquer. Suppose we were given the answers to the subproblems, can we solve merge the subproblem in linear time?

Yes, and that is essentially the same as merge sort.

Suppose we have an array 5 2 6 1, we have the subproblem (5 2) and (6 1), and we already have the answers to them

(5[1] 2[0]) (6[1] 1[0]) // count is put inside the square bracket, what would be the merged solution.

Let's work backwards, the final answer is

(5[2] 2[1] 6[1] 1[0])

Note one thing, the right hand side counts do not change right, of course, they should not.

The 2 has increased count by 1, and that is because it is larger than 1. So if we do the merge just like what one would do with mergesort (and fill the output array from the right hand side first), then we can follow this simple rule.

Whenever a left hand side number get filled to the output buffer, its count increase by the number of already filled numbers on the right.

Let's illustrate that with the array above, we have

Input: (5[1] 2[0]) (6[1] 1[0])
Output: ()

First step, nothing changed, the entry get filled was on the right
Input: (5[1] 2[0]) (6[1])
Output: (1[0])

Second step, count of 2 increase by 1 (that is the number of numbers added from the right hand side so far)

Input: (5[1]) (6[1])
Output: (2[1] 1[0])

Third step, count of 5 increase by 1 (again, that is the number of numbers added from the right hand side so far)

Input: () (6[1])
Output: (5[2] 2[1] 1[0])

First step, nothing changed, the entry get filled was on the right
Input: () ()
Output: (6[1] 5[2] 2[1] 1[0])

Wait, this isn't exactly the answer sequence we wanted! Don't get tricked by this, the right answer sequence is 5[2] 2[1] 6[1] 1[0], apparently, when we sort the sequence, of course, the number sequence changes, only the count stay correct!

Code:

#include "stdafx.h"

// https://leetcode.com/problems/count-of-smaller-numbers-after-self/

#include "LEET_COUNT_OF_SMALLER_NUMBERS_AFTER_SELF.h"
#include <map>
#include <iostream>
#include <sstream>
#include <vector>
#include <string>

using namespace std;

namespace _LEET_COUNT_OF_SMALLER_NUMBERS_AFTER_SELF
{
class Solution
{
public:
vector<int> countSmaller(vector<int>& nums)
{
vector<int> counts(nums.size());
vector<int> result(nums.size());
vector<int> buffer(nums.size());
for (size_t i = 0; i < result.size(); i++)
{
counts[i] = 0;
}
countSmaller(0, nums.size(), nums, result, buffer, counts);
return counts;
}

void countSmaller(int begin, int end, vector<int>& nums, vector<int>& result, vector<int>& buffer, vector<int>& counts)
{
// At this point, we have a correct merge sort implementation
if (end - begin <= 1)
{
for (size_t i = begin; i < end; i++)
{
result[i] = i;
}
return;
}
int mid = (begin + end) / 2;
countSmaller(begin, mid, nums, buffer, result, counts);
countSmaller(mid, end, nums, buffer, result, counts);
int i = mid - 1;
int j = end - 1;
int k = end - 1;
while ((i >= begin) || (j >= mid))
{
if (i < begin)
{
result[k--] = buffer[j--];
}
else if (j < mid)
{
counts[buffer[i]] += (end - 1 - j);
result[k--] = buffer[i--];
}
else
{
// Prioritize moving the left hand side item
// to avoid counting identical element as smaller
if (nums[buffer[i]] <= nums[buffer[j]])
{
counts[buffer[i]] += (end - 1 - j);
result[k--] = buffer[i--];
}
else
{
result[k--] = buffer[j--];
}
}
}
/*cout << "After merge" << endl;
for (int i = begin; i < end; i++)
{
cout << nums[result[i]] << " ";
}
cout << endl;
for (int i = begin; i < end; i++)
{
cout << counts[result[i]] << " ";
}
cout << endl;*/
}
};
};

using namespace _LEET_COUNT_OF_SMALLER_NUMBERS_AFTER_SELF;

int LEET_COUNT_OF_SMALLER_NUMBERS_AFTER_SELF()
{
Solution solution;
int input_array[] = { 5, 2, 6, 1 };
vector<int> input(input_array, input_array + _countof(input_array));
vector<int> result = solution.countSmaller(input);
for (size_t i = 0; i < result.size(); i++)
{
cout << result[i] << " ";
}
cout << endl;
return 0;
}

### LeetCode OJ - Unique Binary Search Trees II

Problem:

Please find the problem here.

Analysis:

We already know the number of trees is the Catalan numbers in the last problem, so we cannot hope for good performance anyway since the Catalan number is exponential. With the exponential sized output, the best we could do is just to minimize the constants.

Solution:

With the observation that the performance is going to be exponential, the simplest above is simply brute force and expand the recurrence to actual tree instead of just a number.

For all possible roots, generate all possible left subtrees and all possible right subtrees using recursion, for each pair create a new tree and output, that's it.

Code:

#include "stdafx.h"

// https://leetcode.com/problems/unique-binary-search-trees-ii/

#include "LEET_UNIQUE_BINARY_SEARCH_TREES_II.h"
#include <map>
#include <iostream>
#include <sstream>
#include <vector>
#include <string>

using namespace std;

namespace _LEET_UNIQUE_BINARY_SEARCH_TREES_II
{
struct TreeNode
{
int val;
TreeNode *left;
TreeNode *right;
TreeNode(int x) : val(x), left(NULL), right(NULL) {}
};

class Solution {
public:
vector<TreeNode*> generateTrees(int min, int max)
{
vector<TreeNode*> result;
if (max < min)
{
result.push_back(NULL);
return result;
}
for (int i = min; i <= max; i++)
{
vector<TreeNode*> leftSubtrees = generateTrees(min, i - 1);
vector<TreeNode*> rightSubtrees = generateTrees(i + 1, max);

for (size_t j = 0; j < leftSubtrees.size(); j++)
{
for (size_t k = 0; k < rightSubtrees.size(); k++)
{
TreeNode* root = new TreeNode(i);
root->left = leftSubtrees[j];
root->right = rightSubtrees[k];
result.push_back(root);
}
}
}
return result;
}
vector<TreeNode*> generateTrees(int n)
{
if (n == 0)
{
vector<TreeNode*> result;
return result;
}
return generateTrees(1, n);
}
};

void ShowTree(TreeNode* tree)
{
if (tree == NULL)
{
cout << "#";
}
else
{
cout << "(" << tree->val;
cout << ", ";
ShowTree(tree->left);
cout << ", ";
ShowTree(tree->right);
cout << ")";
}
}
};

using namespace _LEET_UNIQUE_BINARY_SEARCH_TREES_II;

int LEET_UNIQUE_BINARY_SEARCH_TREES_II()
{
Solution solution;
vector<TreeNode*> trees = solution.generateTrees(3);
for (size_t i = 0; i < trees.size(); i++)
{
ShowTree(trees[i]);
cout << endl;
}
return 0;
}

### LeetCode OJ - Linked List Cycle II

Problem:

Please find the problem here.

Analysis:

In the last problem, we already solved the problem to detect whether or not there is a cycle using a slow pointer and a fast pointer. We get a point in the cycle, but that may not be the starting point of the cycle.

The key observation here is if we have a complete cycle instead, and we start from a point inside the cycle, it always meet at the same starting point.

Let's say we have a cycle like 1 -> 2 -> 3 -> 4 -> 5 -> 1, and the both pointers start from 1, we have the pointer sequence

1, 1
2, 3
3, 5
4, 2
5, 4
1, 1

The logic here is simple, the fast pointer make two cycles when the slow pointer make one.

Solution:

Now here is the interesting thing, for an arbitrary list with cycle, we already know a point on the cycle, the meeting point, what if we trace back the path?

Let say, we have the list 1 -> 2 -> 3 -> 4 - >5 -> 6 -> 3, so the forward pointer sequence would be

1, 1
2, 3
3, 5
4, 3
5, 5

Of course, the backward sequence would simply be reading the sequence above backward, at some point, the pointers would leave the cycle. With the observation above, we know, if we make the pointers to stay on the cycle (instead of going back to 1), it will eventually go back to the meeting point. Let us illustrate the reversed path here

Original (Changed)
5, 5
4, 3
3, 5
2(3), 3(3)
1(5), 1(5)

Notice in the illustration above, what is more true, is that it takes the same amount of time to get back to the original meeting point. This is always true because one can think of projecting the initial portion of the path back to the cycle, they started at the same place and therefore the same must be identical.

With that, we can come up with this simple algorithm.

Code:

#include "stdafx.h"

// https://leetcode.com/problems/linked-list-cycle-ii/

#include "LEET_LINKED_LIST_CYCLE_II.h"
#include <map>
#include <iostream>
#include <sstream>
#include <vector>
#include <string>

using namespace std;

namespace _LEET_LINKED_LIST_CYCLE_II
{
struct ListNode
{
int val;
ListNode *next;
ListNode(int x) : val(x), next(NULL) {}
};
class Solution
{
public:
ListNode* detectCycle(ListNode *head)
{
if (head == NULL)
{
return NULL;
}

ListNode* slow = head;
ListNode* fast = head;
while (true)
{
if (slow->next == NULL)
{
return NULL;
}
else
{
slow = slow->next;
}
if (fast->next == NULL)
{
return NULL;
}
else
{
fast = fast->next;
}
if (fast->next == NULL)
{
return NULL;
}
else
{
fast = fast->next;
}
if (slow == fast)
{
break;
}
}
fast = head;
while (slow != fast)
{
slow = slow->next;
fast = fast->next;
}
return slow;
}
};
};

using namespace _LEET_LINKED_LIST_CYCLE_II;

int LEET_LINKED_LIST_CYCLE_II()
{
Solution solution;
ListNode a(0);
ListNode b(1);
ListNode c(2);
ListNode d(3);
ListNode e(4);
a.next = &b;
b.next = &c;
c.next = &d;
d.next = &e;
e.next = &b;
cout << (solution.detectCycle(&a) == &b) << endl;
return 0;
}

## Thursday, March 17, 2016

### LeetCode OJ - Max Points on a Line

Problem:

Please find the problem here.

Analysis:

A straightforward way to solve this problem is to find all straight lines by going through all pairs. For each line, find how many points are on them, and that would be it. However, it is going to take $O(n^3)$ time, can we do faster?

Solution:

The key observation here is that when we computes the line between two points, if it happens that 3 points (let say, $a$, $b$, $c$ are on the same line, then when we compute the line between pair $a$, $c$, we would know it is on the same line between the pair $a$, $b$ do. So by a carefully designed canonical representation of a line, then we can put all these lines in a hash table and associate all the points with it. That drives our time complexity down to $O(n^2)$.

The canonical representation is easy. Just find the point slope form. To use hash function, I need to make sure the representation is exact, so I keep them numbers as rational numbers, and simplify to their lowest terms, and make sure denominator is non-negative.

Vertical lines need to be special cased as they have no finite slope, so they are.

This is the first time I used a custom hash for unordered_map<>, special thanks to xianrenb and Rudolf on StackOverflow for helping out with the mysterious compiler error.

Code:

#include "stdafx.h"

// https://leetcode.com/problems/max-points-on-a-line/

#include "LEET_MAX_POINTS_ON_A_LINE.h"
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
#include <iostream>
#include <sstream>
#include <vector>
#include <string>

using namespace std;

namespace _LEET_MAX_POINTS_ON_A_LINE
{
struct Point
{
int x;
int y;
Point() : x(0), y(0) {}
Point(int a, int b) : x(a), y(b) {}
};

struct SlantedLine
{
public:
int slope_numerator;
int slope_denominator;
int intercept_numerator;
int intercept_denominator;
};

struct SlantedLineHash
{
size_t operator()(const SlantedLine& k) const
{
return k.slope_numerator ^ k.slope_denominator ^ k.intercept_numerator ^ k.intercept_denominator;
}
};

struct SlantedLineEqual
{
size_t operator()(const SlantedLine& line1, const SlantedLine& line2) const
{
return (line1.slope_numerator == line2.slope_numerator) && (line1.slope_denominator == line2.slope_denominator) && (line1.intercept_numerator == line2.intercept_numerator) && (line1.intercept_denominator == line2.intercept_denominator);
}
};

class Solution
{
public:
int maxPoints(vector<Point>& points)
{
if (points.size() == 0)
{
return 0;
}

unordered_map<SlantedLine, unordered_set<int>, SlantedLineHash, SlantedLineEqual> slantedLineToPointsMap;
unordered_map<int, unordered_set<int>> verticalLineToPointsMap;
for (size_t i = 0; i < points.size(); i++)
{
for (size_t j = i + 1; j < points.size(); j++)
{
int x1 = points[i].x;
int y1 = points[i].y;
int x2 = points[j].x;
int y2 = points[j].y;

if (x1 == x2)
{
unordered_map<int, unordered_set<int>>::iterator probe = verticalLineToPointsMap.find(x1);
if (probe == verticalLineToPointsMap.end())
{
unordered_set<int> points;
points.insert(i);
points.insert(j);
verticalLineToPointsMap.insert(pair<int, unordered_set<int>>(x1, points));
}
else
{
probe->second.insert(i);
probe->second.insert(j);
}
}
else
{
int slope_numerator = y2 - y1;
int slope_denominator = x2 - x1;
int intercept_numerator = y1 * x2 - x1 * y2;
int intercept_denominator = x2 - x1;
simplify_fraction(&slope_numerator, &slope_denominator);
simplify_fraction(&intercept_numerator, &intercept_denominator);
SlantedLine line;
line.slope_numerator = slope_numerator;
line.slope_denominator = slope_denominator;
line.intercept_numerator = intercept_numerator;
line.intercept_denominator = intercept_denominator;
unordered_map<SlantedLine, unordered_set<int>, SlantedLineHash, SlantedLineEqual>::iterator probe = slantedLineToPointsMap.find(line);
if (probe == slantedLineToPointsMap.end())
{
unordered_set<int> points;
points.insert(i);
points.insert(j);
slantedLineToPointsMap.insert(pair<SlantedLine, unordered_set<int>>(line, points));
}
else
{
probe->second.insert(i);
probe->second.insert(j);
}
}
}
}

size_t maxPoints = 0;
for (unordered_map<int, unordered_set<int>>::iterator i = verticalLineToPointsMap.begin(); i != verticalLineToPointsMap.end(); i++)
{
maxPoints = max(maxPoints, i->second.size());
}
for (unordered_map<SlantedLine, unordered_set<int>, SlantedLineHash, SlantedLineEqual>::iterator i = slantedLineToPointsMap.begin(); i != slantedLineToPointsMap.end(); i++)
{
maxPoints = max(maxPoints, i->second.size());
}

return maxPoints;
}
private:
void simplify_fraction(int* numerator, int* denominator)
{
int common_factor = gcd(*numerator, *denominator);
*numerator /= common_factor;
*denominator /= common_factor;
if (*denominator < 0)
{
*numerator *= -1;
*denominator *= -1;
}
}

int gcd(int a, int b)
{
if (a < 0)
{
return gcd(-a, b);
}
else if (b < 0)
{
return gcd(a, -b);
}
else if (b > a)
{
return gcd(b, a);
}
else
{
if (b == 0)
{
return a;
}
else
{
return gcd(b, a % b);
}
}
}
};
};

using namespace _LEET_MAX_POINTS_ON_A_LINE;

int LEET_MAX_POINTS_ON_A_LINE()
{
Solution solution;
Point p[] = { Point(84, 250), Point(0, 0), Point(1, 0), Point(0, -70), Point(0, -70), Point(1, -1), Point(21, 10), Point(42, 90), Point(-42, -230) };
cout << solution.maxPoints(vector<Point>(p, p+ _countof(p))) << endl;
return 0;
}