-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper.js
More file actions
59 lines (53 loc) · 1.98 KB
/
scraper.js
File metadata and controls
59 lines (53 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
//requirements
const { default: axios } = require("axios");
const cheerio = require('cheerio');
const mURL = 'https://www.cram.com';
async function getHTML(url) {
const res = await axios.get(url);
return cheerio.load(res.data);
}
async function getQueryFlashcardsURL(query) {
const invalids = ['/flashcards/create', '/flashcards/apps'];
let url = `${mURL}/search?query=${query}`;
try {
const $ = await getHTML(url);
let links = [];
$('a').each((idx, elem) => {
let link = $(elem).attr('href');
if(/^\/flashcards\/.+$/.test(link) && !invalids.includes(link)) {links.push(link);}
});
return links;
}
catch(e) {
console.log("Something went wrong when fetching QUERY webpage!");
throw e;
}
}
async function getFlashcards(links, max) {
if(links.length == 0) {
return {from_url: null, number_of_flashcards: 0, flashcards: []};
}
let url = `${mURL}${links[Math.floor(Math.random()*links.length)]}`;
let flashcards = [];
try {
const $ = await getHTML(url);
let inSet = parseInt($('body > div.body > div.content-type1.study_content > div.main-content > div.flashCardsListing > h3').text().split()[0]);
for(var i = 0; i < Math.min(inSet, max); ++i) {
let flashcard = $(`#row${i+1}`).text().trim().split("\n")
.filter(elem => /[a-zA-Z]/.test(elem))
.map(elem => elem.trim());
flashcards.push({question: flashcard[0], answer: flashcard[1]});
}
return {from_url: url, number_of_flashcards: flashcards.length, flashcards: flashcards};
}
catch(e) {
console.log("Something went wrong when fetching FLASHCARDS webpage!");
throw e;
}
}
async function getFlashcardResponse(query, max) {
let links = await getQueryFlashcardsURL(query);
let res = await getFlashcards(links, max);
return res;
}
exports.getFlashcardResponse = getFlashcardResponse;