verify.js 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. 'use strict'
  2. const util = require('util')
  3. const pMap = require('p-map')
  4. const contentPath = require('./content/path')
  5. const fixOwner = require('./util/fix-owner')
  6. const fs = require('fs')
  7. const fsm = require('fs-minipass')
  8. const glob = util.promisify(require('glob'))
  9. const index = require('./entry-index')
  10. const path = require('path')
  11. const rimraf = util.promisify(require('rimraf'))
  12. const ssri = require('ssri')
  13. const hasOwnProperty = (obj, key) =>
  14. Object.prototype.hasOwnProperty.call(obj, key)
  15. const stat = util.promisify(fs.stat)
  16. const truncate = util.promisify(fs.truncate)
  17. const writeFile = util.promisify(fs.writeFile)
  18. const readFile = util.promisify(fs.readFile)
  19. const verifyOpts = (opts) => ({
  20. concurrency: 20,
  21. log: { silly () {} },
  22. ...opts
  23. })
  24. module.exports = verify
  25. function verify (cache, opts) {
  26. opts = verifyOpts(opts)
  27. opts.log.silly('verify', 'verifying cache at', cache)
  28. const steps = [
  29. markStartTime,
  30. fixPerms,
  31. garbageCollect,
  32. rebuildIndex,
  33. cleanTmp,
  34. writeVerifile,
  35. markEndTime
  36. ]
  37. return steps
  38. .reduce((promise, step, i) => {
  39. const label = step.name
  40. const start = new Date()
  41. return promise.then((stats) => {
  42. return step(cache, opts).then((s) => {
  43. s &&
  44. Object.keys(s).forEach((k) => {
  45. stats[k] = s[k]
  46. })
  47. const end = new Date()
  48. if (!stats.runTime) {
  49. stats.runTime = {}
  50. }
  51. stats.runTime[label] = end - start
  52. return Promise.resolve(stats)
  53. })
  54. })
  55. }, Promise.resolve({}))
  56. .then((stats) => {
  57. stats.runTime.total = stats.endTime - stats.startTime
  58. opts.log.silly(
  59. 'verify',
  60. 'verification finished for',
  61. cache,
  62. 'in',
  63. `${stats.runTime.total}ms`
  64. )
  65. return stats
  66. })
  67. }
  68. function markStartTime (cache, opts) {
  69. return Promise.resolve({ startTime: new Date() })
  70. }
  71. function markEndTime (cache, opts) {
  72. return Promise.resolve({ endTime: new Date() })
  73. }
  74. function fixPerms (cache, opts) {
  75. opts.log.silly('verify', 'fixing cache permissions')
  76. return fixOwner
  77. .mkdirfix(cache, cache)
  78. .then(() => {
  79. // TODO - fix file permissions too
  80. return fixOwner.chownr(cache, cache)
  81. })
  82. .then(() => null)
  83. }
  84. // Implements a naive mark-and-sweep tracing garbage collector.
  85. //
  86. // The algorithm is basically as follows:
  87. // 1. Read (and filter) all index entries ("pointers")
  88. // 2. Mark each integrity value as "live"
  89. // 3. Read entire filesystem tree in `content-vX/` dir
  90. // 4. If content is live, verify its checksum and delete it if it fails
  91. // 5. If content is not marked as live, rimraf it.
  92. //
  93. function garbageCollect (cache, opts) {
  94. opts.log.silly('verify', 'garbage collecting content')
  95. const indexStream = index.lsStream(cache)
  96. const liveContent = new Set()
  97. indexStream.on('data', (entry) => {
  98. if (opts.filter && !opts.filter(entry)) {
  99. return
  100. }
  101. liveContent.add(entry.integrity.toString())
  102. })
  103. return new Promise((resolve, reject) => {
  104. indexStream.on('end', resolve).on('error', reject)
  105. }).then(() => {
  106. const contentDir = contentPath.contentDir(cache)
  107. return glob(path.join(contentDir, '**'), {
  108. follow: false,
  109. nodir: true,
  110. nosort: true
  111. }).then((files) => {
  112. return Promise.resolve({
  113. verifiedContent: 0,
  114. reclaimedCount: 0,
  115. reclaimedSize: 0,
  116. badContentCount: 0,
  117. keptSize: 0
  118. }).then((stats) =>
  119. pMap(
  120. files,
  121. (f) => {
  122. const split = f.split(/[/\\]/)
  123. const digest = split.slice(split.length - 3).join('')
  124. const algo = split[split.length - 4]
  125. const integrity = ssri.fromHex(digest, algo)
  126. if (liveContent.has(integrity.toString())) {
  127. return verifyContent(f, integrity).then((info) => {
  128. if (!info.valid) {
  129. stats.reclaimedCount++
  130. stats.badContentCount++
  131. stats.reclaimedSize += info.size
  132. } else {
  133. stats.verifiedContent++
  134. stats.keptSize += info.size
  135. }
  136. return stats
  137. })
  138. } else {
  139. // No entries refer to this content. We can delete.
  140. stats.reclaimedCount++
  141. return stat(f).then((s) => {
  142. return rimraf(f).then(() => {
  143. stats.reclaimedSize += s.size
  144. return stats
  145. })
  146. })
  147. }
  148. },
  149. { concurrency: opts.concurrency }
  150. ).then(() => stats)
  151. )
  152. })
  153. })
  154. }
  155. function verifyContent (filepath, sri) {
  156. return stat(filepath)
  157. .then((s) => {
  158. const contentInfo = {
  159. size: s.size,
  160. valid: true
  161. }
  162. return ssri
  163. .checkStream(new fsm.ReadStream(filepath), sri)
  164. .catch((err) => {
  165. if (err.code !== 'EINTEGRITY') {
  166. throw err
  167. }
  168. return rimraf(filepath).then(() => {
  169. contentInfo.valid = false
  170. })
  171. })
  172. .then(() => contentInfo)
  173. })
  174. .catch((err) => {
  175. if (err.code === 'ENOENT') {
  176. return { size: 0, valid: false }
  177. }
  178. throw err
  179. })
  180. }
  181. function rebuildIndex (cache, opts) {
  182. opts.log.silly('verify', 'rebuilding index')
  183. return index.ls(cache).then((entries) => {
  184. const stats = {
  185. missingContent: 0,
  186. rejectedEntries: 0,
  187. totalEntries: 0
  188. }
  189. const buckets = {}
  190. for (const k in entries) {
  191. /* istanbul ignore else */
  192. if (hasOwnProperty(entries, k)) {
  193. const hashed = index.hashKey(k)
  194. const entry = entries[k]
  195. const excluded = opts.filter && !opts.filter(entry)
  196. excluded && stats.rejectedEntries++
  197. if (buckets[hashed] && !excluded) {
  198. buckets[hashed].push(entry)
  199. } else if (buckets[hashed] && excluded) {
  200. // skip
  201. } else if (excluded) {
  202. buckets[hashed] = []
  203. buckets[hashed]._path = index.bucketPath(cache, k)
  204. } else {
  205. buckets[hashed] = [entry]
  206. buckets[hashed]._path = index.bucketPath(cache, k)
  207. }
  208. }
  209. }
  210. return pMap(
  211. Object.keys(buckets),
  212. (key) => {
  213. return rebuildBucket(cache, buckets[key], stats, opts)
  214. },
  215. { concurrency: opts.concurrency }
  216. ).then(() => stats)
  217. })
  218. }
  219. function rebuildBucket (cache, bucket, stats, opts) {
  220. return truncate(bucket._path).then(() => {
  221. // This needs to be serialized because cacache explicitly
  222. // lets very racy bucket conflicts clobber each other.
  223. return bucket.reduce((promise, entry) => {
  224. return promise.then(() => {
  225. const content = contentPath(cache, entry.integrity)
  226. return stat(content)
  227. .then(() => {
  228. return index
  229. .insert(cache, entry.key, entry.integrity, {
  230. metadata: entry.metadata,
  231. size: entry.size
  232. })
  233. .then(() => {
  234. stats.totalEntries++
  235. })
  236. })
  237. .catch((err) => {
  238. if (err.code === 'ENOENT') {
  239. stats.rejectedEntries++
  240. stats.missingContent++
  241. return
  242. }
  243. throw err
  244. })
  245. })
  246. }, Promise.resolve())
  247. })
  248. }
  249. function cleanTmp (cache, opts) {
  250. opts.log.silly('verify', 'cleaning tmp directory')
  251. return rimraf(path.join(cache, 'tmp'))
  252. }
  253. function writeVerifile (cache, opts) {
  254. const verifile = path.join(cache, '_lastverified')
  255. opts.log.silly('verify', 'writing verifile to ' + verifile)
  256. try {
  257. return writeFile(verifile, '' + +new Date())
  258. } finally {
  259. fixOwner.chownr.sync(cache, verifile)
  260. }
  261. }
  262. module.exports.lastRun = lastRun
  263. function lastRun (cache) {
  264. return readFile(path.join(cache, '_lastverified'), 'utf8').then(
  265. (data) => new Date(+data)
  266. )
  267. }