-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathindex.js
112 lines (102 loc) · 3.52 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
// taking github url as input for scraping e.g https://github.com/varunon9
var url = process.argv[2];
// modifying url because it contains user's repositories details as well
url += '?tab=repositories';
// making get request to supplied url and scraping it
request(url, function(error, response, html) {
if (!error) {
var $ = cheerio.load(html);
// output will be written in json form
var outputJson = scrapGithub($);
writeOutputFile(outputJson);
} else {
console.log("Error occured:", error);
}
});
//function to extract user data
var scrapGithub = function($) {
var outputJson = {};
outputJson.avatar = $('.vcard-avatar img').attr('src');
outputJson.name = $('.vcard-fullname', '.vcard-names').text();
outputJson.username = $('.vcard-username', '.vcard-names').text();
outputJson.description = $('.user-profile-bio').text();
var selector = '.vcard-details li';
/**
* Some data scraped were containing new line character ('\n')
* and white spaces. So I removed it explictly using regex and .replace function
*/
$(selector).each(function(i, e) {
switch (i) {
case 0: {
outputJson.organization = $(e).text();
break;
}
case 1: {
outputJson.location =
// replacing newline as well as leading space
$(e).text().replace(/(\r\n|\n|\r|^\s+)/gm, '');
break;
}
case 2: {
outputJson.email = $('a', e).text();
break;
}
case 3: {
outputJson.website = $('a', e).text();
break;
}
}
});
selector = '.user-profile-nav a';
$(selector).each(function(i, e) {
switch (i) {
case 1: {
outputJson.repositories_count =
// replacing newline as well as all white spaces
$('span', e).text().replace(/(\r\n|\n|\r|\s)/gm, '');
break;
}
case 2: {
outputJson.stars =
$('span', e).text().replace(/(\r\n|\n|\r|\s)/gm, '');
break;
}
case 3: {
outputJson.followers =
$('span', e).text().replace(/(\r\n|\n|\r|\s)/gm, '');
break;
}
case 4: {
outputJson.following =
$('span', e).text().replace(/(\r\n|\n|\r|\s)/gm, '');
break;
}
}
});
// scraping repositories details
outputJson.repositories = [];
$('.js-repo-list li', '#user-repositories-list').each(function(i, e) {
var repository = {};
repository.name = $('a', e).text().replace(/(\r\n|\n|\r|^\s+)/gm, '');
repository.url = 'https://github.com';
repository.url += $('a', e).attr('href');
repository.description = $('p.text-gray', e).text().replace(/(\r\n|\n|\r|^\s+)/gm, '');
outputJson.repositories.push(repository);
});
return outputJson;
};
// function to write user.json file to disk
var writeOutputFile = function(outputJson) {
var data = JSON.stringify(outputJson);
console.log(data);
fs.writeFile('user.json', data, function(err) {
if (!err) {
console.log(data);
} else {
console.log(err);
}
});
};