forked from ex01tus/leetcode-grind
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebCrawler.java
More file actions
41 lines (32 loc) · 1.02 KB
/
WebCrawler.java
File metadata and controls
41 lines (32 loc) · 1.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
package graph;
import java.util.*;
/**
* Description: https://leetcode.com/problems/web-crawler
* Difficulty: Medium
* Time complexity: O(n)
* Space complexity: O(n)
*/
public class WebCrawler {
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
String hostname = getHostname(startUrl);
Queue<String> planned = new LinkedList<>();
planned.offer(startUrl);
Set<String> visited = new HashSet<>();
visited.add(startUrl);
while (!planned.isEmpty()) {
String current = planned.poll();
for (String neighbor : htmlParser.getUrls(current)) {
if (neighbor.startsWith(hostname) && visited.add(neighbor)) {
planned.offer(neighbor);
}
}
}
return new ArrayList<>(visited);
}
private String getHostname(String startUrl) {
return "http://" + startUrl.split("/")[2];
}
private interface HtmlParser {
List<String> getUrls(String url);
}
}