forked from electron/apps
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreadmes.js
More file actions
131 lines (117 loc) · 3.58 KB
/
Copy pathreadmes.js
File metadata and controls
131 lines (117 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
const MAX_CONCURRENCY = Number(process.env.MAX_CONCURRENCY) || 4 // simultaneous open web requests
const README_CACHE_TTL = require('human-interval')(
process.env.README_CACHE_TTL || '4 hours'
)
const fs = require('fs')
const path = require('path')
const Bottleneck = require('bottleneck')
const github = require('../lib/github')
const cheerio = require('cheerio')
const parseGitUrl = require('github-url-to-object')
const outputFile = path.join(__dirname, '../meta/readmes.json')
const oldReadmeData = require(outputFile)
const output = {}
const limiter = new Bottleneck({
maxConcurrent: MAX_CONCURRENCY,
})
const apps = require('../lib/raw-app-list')()
const appsWithRepos = require('../lib/apps-with-github-repos')
const appsToUpdate = appsWithRepos.filter((app) => {
const oldData = oldReadmeData[app.slug]
if (!oldData) return true
const oldDate = new Date(oldData.readmeFetchedAt || null).getTime()
return oldDate + README_CACHE_TTL < Date.now()
})
console.log(
`${appsWithRepos.length} of ${apps.length} apps have a GitHub repo.`
)
console.log(
`${appsToUpdate.length} of those ${appsWithRepos.length} have missing or outdated README data.`
)
appsToUpdate.forEach((app) => {
limiter
.schedule(getReadme, app)
.then((repository) => {
return repository.data.default_branch
})
.catch((err) => {
if (err.status !== 404) {
console.error(`${app.slug}: Non 404 error`)
console.error(err)
}
return
})
.then((defaultBranch) => {
limiter
.schedule(getReadme, app, defaultBranch)
.then((release) => {
console.log(`${app.slug}: got latest README`)
output[app.slug] = {
readmeCleaned: cleanReadme(release.data, defaultBranch, app),
readmeOriginal: release.data,
readmeFetchedAt: new Date(),
}
})
.catch((err) => {
console.error(`${app.slug}: no README found`)
output[app.slug] = {
readmeOriginal: null,
readmeFetchedAt: new Date(),
}
if (err.status !== 404) {
console.error(`${app.slug}: Non 404 error`)
console.error(err)
}
})
})
})
limiter.on('idle', () => {
setTimeout(() => {
fs.writeFileSync(outputFile, JSON.stringify(output, null, 2))
console.log(`Done fetching README files.\nWrote ${outputFile}`)
process.exit()
}, 1000)
})
function getReadme(app, defaultBranch) {
const { user: owner, repo } = parseGitUrl(app.repository)
const opts = {
owner: owner,
repo: repo,
headers: {
Accept: 'application/vnd.github.v3.html',
},
}
if (defaultBranch) {
return github.repos.getReadme(opts)
}
return github.repos.get(opts)
}
function cleanReadme(readme, defaultBranch, app) {
const $ = cheerio.load(readme)
const $relativeImages = $('img').not('[src^="http"]')
if ($relativeImages.length) {
console.log(
`${app.slug}: updating ${$relativeImages.length} relative image URLs`
)
$relativeImages.each((i, img) => {
$(img).attr({
src: `${app.repository.replace(
'github.com',
'raw.githubusercontent.com'
)}/${defaultBranch}/${$(img).attr('src')}`,
crossorigin: '',
})
})
}
const $relativeLinks = $('a').not('[href^="http"]')
if ($relativeLinks.length) {
console.log(`${app.slug}: updating ${$relativeLinks.length} relative links`)
$relativeLinks.each((i, link) => {
$(link).attr(
'href',
`${app.repository}/blob/${defaultBranch}/${$(link).attr('href')}`
)
})
}
return $('body').html()
}