diff --git a/Problem_1.py b/Problem_1.py new file mode 100644 index 0000000..e7997a3 --- /dev/null +++ b/Problem_1.py @@ -0,0 +1,160 @@ +''' +1168 Optimal Water Distribution in a Village +https://leetcode.com/problems/optimize-water-distribution-in-a-village/description/ + +There are n houses in a village. We want to supply water for all the houses by building wells and laying pipes. + +For each house i, we can either build a well inside it directly with cost wells[i - 1] (note the -1 due to 0-indexing), or pipe in water from another well to it. The costs to lay pipes between houses are given by the array pipes where each pipes[j] = [house1j, house2j, costj] represents the cost to connect house1j and house2j together using a pipe. Connections are bidirectional, and there could be multiple valid connections between the same two houses with different costs. + +Return the minimum total cost to supply water to all houses. + +Example 1: +Input: n = 3, wells = [1,2,2], pipes = [[1,2,1],[2,3,1]] +Output: 3 +Explanation: The image shows the costs of connecting houses using pipes. +The best strategy is to build a well in the first house with cost 1 and connect the other houses to it with cost 2 so the total cost is 3. + +Example 2: +Input: n = 2, wells = [1,1], pipes = [[1,2,1],[1,2,2]] +Output: 2 +Explanation: We can supply water with cost two using one of the three options: +Option 1: + - Build a well inside house 1 with cost 1. + - Build a well inside house 2 with cost 1. +The total cost will be 2. +Option 2: + - Build a well inside house 1 with cost 1. + - Connect house 2 with house 1 with cost 1. +The total cost will be 2. +Option 3: + - Build a well inside house 2 with cost 1. + - Connect house 1 with house 2 with cost 1. +The total cost will be 2. +Note that we can connect houses 1 and 2 with cost 1 or with cost 2 but we will always choose the cheapest option. + +Constraints: +2 <= n <= 10^4 +wells.length == n +0 <= wells[i] <= 10^5 +1 <= pipes.length <= 10^4 +pipes[j].length == 3 +1 <= house1_j, house2_j <= n +0 <= cost_j <= 10^5 +house1_j != house2_j + +Solution: +1. Build a minimum cost spanning tree using Kruskal's algorithm +We connect each house to a virtual node 0 using the well cost as an edge. Then we collect all those edges along with the real pipes into one list and sort by cost. Using Union-Find, we keep adding the cheapest edges that connect new components until all are connected. + +The most important takeaway here is in Union Find (unionizing). It involves the use of an array to assign the group membership of nodes in disjoint sets. + +https://youtu.be/MaHBeXA3jI0?t=1308 (explanation and dry run) +https://youtu.be/MaHBeXA3jI0?t=2581 (code) +https://youtu.be/wU6udHRIkcc?t=667 (dry run of unionization) + +Time: O((N+M) log (N+M)), N = num houses, M = num pipes +Space: O(N+M) (edges[] is O(N+M) and uf[] is O(N)) +(Thus N + M = total no. of edges in the graph) + +More generally, if there are E edges and V vertices in the graph, +Time: O(E log E), Space: O(E+V) + +2. Min-Heap +We connect each house to a virtual node 0 using the well cost as an edge. +Use a priority queue (min-heap) to always pick the cheapest edge connecting a new house. Keep adding edges until all nodes are visited, and sum their costs for the answer. +https://youtu.be/4ZlRH0eK-qQ?t=694 +Time: O((N+M) log (N)), N = num houses, M = num pipes +Space: O(N+M) (edges[] is O(N+M) and uf[] is O(N)) + +''' +from typing import List +from collections import defaultdict + +def minCostToSupplyWater_Kruskal(n: int, wells: List[int], pipes: List[List[int]]) -> int: + def union_find(uf, child): + parent = uf[child] + if child == parent: + return parent + ancestor = union_find(uf, parent) + uf[child] = ancestor + return ancestor + + if n == 0: + return + uf = [i for i in range(n+1)] # well: i = 0 , pipes: i > 0 + edges = [] + for i in range(len(wells)): + edges.append([0, i+1, wells[i]]) + edges.extend(pipes) + edges.sort(key = lambda x: x[2]) # sorting can be done by a min-heap as well + + total_cost = 0 + for edge in edges: + x, y, cost = edge[0], edge[1], edge[2] + px = union_find(uf, x) + py = union_find(uf, y) + if px == py: # this edge will form a loop. + continue # hence skip adding this edge + # At this point, px != py. Hence, unionize which + # means make two nodes have the same parent + uf[px] = py + #compress_path(uf) + total_cost += cost + return total_cost + +def minCostToSupplyWater_MinHeap(n, wells, pipes): + edges = [] + for pipe in pipes: + edges.append(pipe) + + for i in range(1, n + 1): + edges.append([0, i, wells[i - 1]]) + + map = defaultdict(list) + for edge in edges: + map[edge[0]].append([edge[1], edge[2]]) + map[edge[1]].append([edge[0], edge[2]]) + + import heapq + pq = [] + heapq.heappush(pq, [0, 0]) + + visited = [False] * (n + 1) + result = 0 + + while pq: + node, cost = heapq.heappop(pq) + if visited[node]: + continue + + visited[node] = True + result += cost + + for ne in map[node]: # worst case O(N) + if not visited[ne[0]]: + heapq.heappush(pq, ne) + + return result + +def run_minCostToSupplyWater(): + tests = [(3, [1,2,2], [[1,2,1],[2,3,1]], 3), + (2, [1,1], [[1,2,1],[1,2,2]], 2), + ] + for test in tests: + n, wells, pipes, ans = test[0], test[1], test[2], test[3] + print(f"\nwells= {wells}") + print(f"pipes = {pipes}") + print(f"num houses = {n}") + for method in ['Kruskal', 'Min-Heap']: + if method == 'Kruskal': + cost = minCostToSupplyWater_Kruskal(n, wells, pipes) + elif method == 'Min-Heap': + cost = minCostToSupplyWater_MinHeap(n, wells, pipes) + print(f"Method {method}: min cost = {cost}") + success = (ans == cost) + print(f"Pass: {success}") + if not success: + print("Failed") + return + +run_minCostToSupplyWater() \ No newline at end of file diff --git a/Problem_2.py b/Problem_2.py new file mode 100644 index 0000000..97919d3 --- /dev/null +++ b/Problem_2.py @@ -0,0 +1,199 @@ +''' +277 Find the celebrity +https://leetcode.com/problems/find-the-celebrity/description/ + +Suppose you are at a party with n people labeled from 0 to n - 1 and among them, there may exist one celebrity. The definition of a celebrity is that all the other n - 1 people know the celebrity, but the celebrity does not know any of them. + +Now you want to find out who the celebrity is or verify that there is not one. You are only allowed to ask questions like: "Hi, A. Do you know B?" to get information about whether A knows B. You need to find out the celebrity (or verify there is not one) by asking as few questions as possible (in the asymptotic sense). + +You are given an integer n and a helper function bool knows(a, b) that tells you whether a knows b. Implement a function int findCelebrity(n). There will be exactly one celebrity if they are at the party. + +Return the celebrity's label if there is a celebrity at the party. If there is no celebrity, return -1. + +Note that the n x n 2D array graph given as input is not directly available to you, and instead only accessible through the helper function knows. graph[i][j] == 1 represents person i knows person j, wherease graph[i][j] == 0 represents person j does not know person i. + +Example 1: +Input: graph = [[1,1,0],[0,1,0],[1,1,1]] +Output: 1 +Explanation: There are three persons labeled with 0, 1 and 2. graph[i][j] = 1 means person i knows person j, otherwise graph[i][j] = 0 means person i does not know person j. The celebrity is the person labeled as 1 because both 0 and 2 know him but 1 does not know anybody. + +Example 2: +Input: graph = [[1,0,1],[1,1,0],[0,1,1]] +Output: -1 +Explanation: There is no celebrity. + +Constraints: +n == graph.length == graph[i].length +2 <= n <= 100 +graph[i][j] is 0 or 1. +graph[i][i] == 1 + +Solution: +1. Indegrees and outdegrees +indegrees of vertex i = no .of incoming edges +outdegrees of vertex i = no .of outgoing edges + +If i is a celebrity, then: +indegrees[i] = n-1 (-1 because we do not count self-loops, i.e. dont count celebrity knows celebrity) +outdegrees[i] = 0 +difference = indegrees[i] - outdegrees[i] + = n -1 + +If i is not a celebrity, then: +indegrees[i] = k, k <= n-2 +difference <= n-2 (but never equal to n-1) + +https://youtu.be/sPOst2hE4_M?t=2257 + +Time: O(N^2), Space: O(2N) = O(N) + + +2. Similar to solution 1, but we optimize the space by using a single indegrees[] array. We add for every incoming edge and we subtract for every outgoing edge. +Time: O(N^2), Space: O(N) + +3. We assume that the celebrity candidate = person 0. Then, + +we check if candidate knows person 1. +yes: update the candidate to person 1. Thus, candidate = person 1 +no: no change in candidate + +we check if candidate knows person 2. +yes: update the candidate to person 2. Thus, candidate = person 2 +no: no change in candidate + +we check if candidate knows person 3. +yes: update the candidate to person 3. Thus, candidate = person 3 +no: no change in candidate + +We continue like this until we have ran a check on all N persons. At the end, we have a potential candidate but not a guranteed candidate yet. + +Why not guranteed? Because we have always been checking if candidate knows person i, where i is any index after the candidate's index. Let candidate be at index k=i-1. Since k is a potential candidate, all we know is that person k doesn't know person i, i+1, .. N-1. But we haven't checked two things yet: +a) if candidate doesn't know persons 0,...,i-2 +b) if everyone knows the candidate + +Both (a) and (b) must be satisfied for the candidate to be declared a celebrity. +for i in range(n): + if celeb != i # discard self-loops + if knows(i, candidate) and not knows(candidate, i): + continue + else: + candidate = -1 + break + +The advantage of this approach is that it takes O(N) time. + +https://youtu.be/sPOst2hE4_M?t=2960 + +Time: O(N), Space: O(1) +''' +def mprint(matrix): + print('\n'.join(['\t'.join([str(cell) for cell in row]) for row in matrix])) + +# The knows API is already defined for you. +# return a bool, whether a knows b +# def knows(a: int, b: int) -> bool: + +def knows(graph, i, j): + return graph[i][j] + +def findCelebrity_1(graph, n: int) -> int: + ''' Time: O(N^2), Space: O(2N) ''' + if n == 0: + return -1 + indegrees = [0]*n + outdegrees = [0]*n + celebrity = -1 + for i in range(n): + for j in range(n): + if i != j: + if knows(graph, i,j): + outdegrees[i] = outdegrees[i] + 1 + indegrees[j] = indegrees[j] + 1 + + for i in range(n): + indegrees[i] = indegrees[i] - outdegrees[i] + + for i in range(n): + if indegrees[i] == n-1 and outdegrees[i] == 0: + celebrity = i + return celebrity + +def findCelebrity_2(graph, n: int) -> int: + ''' Time: O(N^2), Space: O(N) ''' + if n == 0: + return -1 + indegrees = [0]*n + celebrity = -1 + for i in range(n): + for j in range(n): + if i != j: + if knows(graph, i,j): + indegrees[i] = indegrees[i] - 1 + indegrees[j] = indegrees[j] + 1 + + for i in range(n): + if indegrees[i] == n-1: + celebrity = i + return celebrity + +def findCelebrity_3(graph, n: int) -> int: + ''' Time: O(N), Space: O(1) ''' + if n == 0: + return -1 + celeb = 0 + for i in range(n): + if i != celeb: + if knows(graph, celeb, i): + # celeb not supposed to know i but if he knows + # then he cannot be a celebrity. Hence, assume the + # that the next candidate for celebrity is i + celeb = i + + # At this point, we have a candidate celebrity. Let posn of celeb = k + # But we haven't checked two things yet: + # a) if candidate doesn't know persons ahead of him 0,...,k-1 + # b) if everyone knows the candidate + # Both (a) and (b) must be satisfied for the candidate to be declared a + # celebrity. We use the reverse logic instead, i.e., not a or not b + for i in range(n): + if i != celeb: + if not knows(graph, i, celeb) or knows (graph, celeb, i): + return -1 + return celeb + + +def run_findCelebrity(): + tests = [([[1,1,0], + [0,1,0], + [1,1,1]], 1), + ([[1,1,1,0,0], + [1,1,1,0,1], + [0,0,1,0,0], + [0,0,1,1,1], + [0,1,1,1,1]], 2), + ([[1,1,1,0,0], + [1,1,1,0,1], + [0,0,1,0,0], + [0,0,1,1,1], + [0,1,0,1,1]], -1), + ] + for test in tests: + graph, ans = test[0], test[1] + print(f"\ngraph:") + mprint(graph) + for method in [1,2,3]: + if method == 1: + celebrity = findCelebrity_1(graph, len(graph)) + elif method == 2: + celebrity = findCelebrity_2(graph, len(graph)) + elif method == 3: + celebrity = findCelebrity_3(graph, len(graph)) + + print(f"Method {method}: celebrity = {celebrity}") + success = (ans == celebrity) + print(f"Pass: {success}") + if not success: + print(f"Failed") + return + +run_findCelebrity() \ No newline at end of file diff --git a/Problem_3.py b/Problem_3.py new file mode 100644 index 0000000..d7e60dd --- /dev/null +++ b/Problem_3.py @@ -0,0 +1,206 @@ +''' +Travelling is Fun +https://github.com/Zhouzhiling/leetcode/blob/master/Mathworks%20OA%202019%20Traveling%20is%20Fun.md + +Julia is planning a vacation and has a list of cities she wants to visit. She doesn't have a map of the area, but she does have some data that will help here determine whether there is a road connection all the cities she wants to visit. The data comes in the form of two arrays. Each of the first array's elements is an origin city. Each of the second array's is a destination. There is also an integer value threshold. She can tell that any two cities are connected if the values at origin and destination share a common divisor greater than the threshold. Citites are indexed starting at 0. + +Each of the pairs, originCities[0] and destinationCities[0] for example, represents a route she wants to take. For each pair, determine whether there is a route between cities. The route does not have to be direct. See the explanation to Sample Case 1 relating to originCity equals 2 or 4 for examples. + +For instance, consider an array originCities = [1,2,3] and destinationCities = [4,5,6]. The threshold value is 2. There are 6 total cities. To draw the map, first determine the divisors of all cities: +Origin Cities Divisors Destination Cities Divisors +1 1 4 1,2,4 +2 1,2 5 1,5 +3 1,3 6 1,2,3,6 + +The threshold is 2, so we can eliminate cities 1 and 2. Their deivisors are not greater than the threshold. This leaves city 3 to check in the origins list. It has a divisor in common with city 6, and is greater than the threshold so there is a road between them. This is the only pair connected cities. Now that we have created a map, we can check her routes. + +She wants to go from originCity[0] = 1 to desitinationCity[0] = 4 but there is no road. There is no road for her second route either, form city 2 to 5. There is only a road that matches her third route at index 2, from city 3 to 6. A true/fals array of her results would be paths = [0,0,1]. + +Function description +Complete the function findConnection() below. The function must return a true/false array where each paths[i] contains 1 if the route between originCities[i] and destinationCities[i] exists, or 0 if it does not. + +findConnection() has the following parameter(s): n: integer, the number of cities g: integer, the threshold value originCities[originCities[0], ... originCities[q-1]]: an array of integers desitinationCities[desitnationCities[0], ... desitinationCities[q-1]]: an array of integers + +Constraints +2 <= n <= 2 *10^5 +0 <= g <= n +1 <= q <= min(n*(n-1)/2, 10^5) +1 <= originCities[i], destinationCities[i] <= n, where 0 <= i < q +originCities[i] != destinationCities[i], where 0 <= i < q + +Example 1: +Input: n = 6 + threshold = 0 + origin = [1,4,3,6] + destination = [3,6,2,5] +Output: [1, 1, 1, 1] + +Example 2: +Input: n = 6 + threshold = 1 + origin = [1,2,4,6] + destination = [3,3,3,4] +Output: [0, 1, 1, 1] + +Solution: +1. BFS +We connect all node pairs whose GCD is greater than the threshold. +Then for each query, we do a BFS from origin to destination. If we can reach it, we set 1; if not, we leave it as 0. +https://youtu.be/MaHBeXA3jI0?t=3796 +Time: O(V^2 · logV + q·(V + E)) where V - vertices, E - Edges and q - number of queries. V^2 for building the graph, log V for finding GCD, V+E for BFS +Space: O(V^2 + V) (adjacency list. Worst case V^2 if graph is dense. V for queue) + +2. DFS +We connect all node pairs whose GCD is greater than the threshold. +Then for each query, we do a DFS from origin to destination. If we can reach it, we set 1; if not, we leave it as 0. If it does, we mark the result as 1, otherwise it's 0. +Time: O(V^2 · logV + q·(V + E)), Space: O(V^2 + V) (space V^2 for adj list + V recursion depth) + +3. Disjoint Union +We group all nodes using union-find where GCD is above threshold. +Either check every pair or multiples (commented version). For each query, we check if origin and destination are in the same group. +Time: O(V^2 · logV + q) where V - vertices, E - Edges and q - number of queries. V^2 for building the graph, log V for finding GCD. +Space: O(V) (V for parent[] array) + +''' +from collections import defaultdict, deque + +def gcd(a, b): + if a == 0: + return b + return gcd(b%a, a) + +def build_graph(n, threshold): + adj_list = defaultdict(set) + for i in range(1, n+1): + for j in range(1, n+1): + if i == j: + continue + g = gcd(i,j) + if g > threshold: + adj_list[i].add(j) + adj_list[j].add(i) + return adj_list + +def findConnection_BFS(n, threshold, origin, destination): + graph = build_graph(n, threshold) + paths = [0]*len(origin) + for i in range(len(origin)): + source = origin[i] + target = destination[i] + q = deque() + q.append(source) + visited = [False]*(n+1) + visited[source] = True + while q: + curr = q.popleft() + if curr == target: + paths[i] = 1 + break + for nbr in graph[curr]: + if not visited[nbr]: + q.append(nbr) + visited[nbr]=True + return paths + +def findConnection_DFS(n, threshold, origin, destination): + def dfs(source, target, graph, visited): + # base + if source == target: + return True + + # logic + for nbr in graph[source]: + if not visited[nbr]: + visited[nbr] = True + reached = dfs(nbr, target, graph, visited) + if reached: + return True + return False + + graph = build_graph(n, threshold) + paths = [0]*len(origin) + for i in range(len(origin)): + source = origin[i] + target = destination[i] + visited = [False]*(n+1) + visited[source] = True + reached = dfs(source, target, graph, visited) + if reached: paths[i] = 1 + return paths + +def findConnection_Union(n, threshold, origin, destination): + def find(x): + if parent[x] != x: + parent[x] = find(parent[x]) + return parent[x] + + def union(x, y): + px = find(x) + py = find(y) + if px != py: + parent[px] = py + + + global parent + parent = [i for i in range(n + 1)] + + # build graph using GCD + for i in range(1, n + 1): + for j in range(1, n + 1): + if i == j: + continue + if gcd(i, j) > threshold: + union(i, j) + + result = [] + for x, y in zip(origin, destination): + result.append(1 if find(x) == find(y) else 0) + return result + +def run_findConnection(): + tests = [(6,0,[1,4,3,6],[3,6,2,5],[1,1,1,1]), + (6,1,[1,2,4,6],[3,3,3,4],[0,1,1,1]), + ] + for test in tests: + n, threshold, origin, destination, ans = test[0], test[1], test[2], test[3], test[4] + print(f"\nno. of cities = {n}") + print(f"threshold = {threshold}") + print(f"origin cities = {origin}") + print(f"destination cities = {destination}") + for method in ['BFS', 'DFS', 'Disjoint-Union']: + if method == 'BFS': + paths = findConnection_BFS(n, threshold, origin, destination) + elif method == 'DFS': + paths = findConnection_DFS(n, threshold, origin, destination) + elif method == 'Disjoint-Union': + paths = findConnection_Union(n, threshold, origin, destination) + print(f"Method {method}: paths = {paths}") + success = (ans == paths) + print(f"Pass: {success}") + if not success: + print(f"Failed") + return + +run_findConnection() + +# n=6 +# threshold = 0 +# origin = [1,4,3,6] +# destination = [3,6,2,5] +# paths = findConnection_BFS(n, threshold, origin, destination) +# print(paths) +# paths = findConnection_DFS(n, threshold, origin, destination) +# print(paths) +# paths = findConnection_Union(n, threshold, origin, destination) +# print(paths) + +# n=6 +# threshold = 1 +# origin = [1,2,4,6] +# destination = [3,3,3,4] +# paths = findConnection_BFS(n, threshold, origin, destination) +# print(paths) +# paths = findConnection_DFS(n, threshold, origin, destination) +# print(paths) +# paths = findConnection_Union(n, threshold, origin, destination) +# print(paths)