From ee4265ec1a827bc32c2d2cf320af45c96cde2ce0 Mon Sep 17 00:00:00 2001 From: David Crosby Date: Tue, 20 Aug 2024 09:49:25 -0700 Subject: [PATCH] Add CookbookDepShaker report Summary: This bookworm report identifies recipe-only cookbooks, and then checks that against the include_recipe calls that are within all cookbooks as well as role run lists determining which cookbooks can be safely removed from metadata.rb without introducing new transitive dependency issues. Since Bookworm doesn't yet have definitions support, and this has up to now only been run on our codebase, the report has a warning so that it's known that this is a tool to make identification easier, and not a license to yank out dependencies without thinking. Differential Revision: D51319562 fbshipit-source-id: 81cc8beb5ada219368fdc8464777857eaff10970 --- .../bookworm/reports/CookbookDepShaker.rb | 207 ++++++++++++++++++ .../rules/CookbookPropertyLiterals.rb | 35 +++ .../rules/CookbookPropertyLiterals_spec.rb | 73 ++++++ 3 files changed, 315 insertions(+) create mode 100644 cookbooks/fb_bookworm/files/default/bookworm/reports/CookbookDepShaker.rb create mode 100644 cookbooks/fb_bookworm/files/default/bookworm/rules/CookbookPropertyLiterals.rb create mode 100644 cookbooks/fb_bookworm/files/default/bookworm/spec/rules/CookbookPropertyLiterals_spec.rb diff --git a/cookbooks/fb_bookworm/files/default/bookworm/reports/CookbookDepShaker.rb b/cookbooks/fb_bookworm/files/default/bookworm/reports/CookbookDepShaker.rb new file mode 100644 index 000000000..6ced5bf78 --- /dev/null +++ b/cookbooks/fb_bookworm/files/default/bookworm/reports/CookbookDepShaker.rb @@ -0,0 +1,207 @@ +# Copyright (c) 2023-present, Meta, Inc. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This report determines what can be safely removed from a cookbook's +# metadata.rb file. The benefit to this is that a client is less likely to +# download unnecessary cookbooks as part of the run synchronization, and makes +# dead code identification easier. + +# TODO: need to add/include definitions key to be entirely correct + +description 'Determines cookbooks that are safe to shake out of metadata.rb' +needs_rules %w{ + MetadatarbExists + AttributeExists + LibraryExists + ResourceExists + ProviderExists + ExplicitMetadataDepends + IncludeRecipeLiterals + IncludeRecipeDynamic + RoleRunListRecipes + CookbookPropertyLiterals +} + +def determine_role_referenced_cookbooks + recipes = Set.new + @kb.roles.each do |_, metadata| + metadata['RoleRunListRecipes'].each do |recipe| + recipes << recipe + end + end + Set.new(recipes.map { |x| x.gsub(/::.*/, '') }).each do |cb| + @kb.cookbooks[cb]['role_referenced_cookbook'] = true + end +end + +def determine_cookbook_property_referenced_cookbooks + cookbooks = Set.new + @kb.recipes.each do |_, metadata| + metadata['CookbookPropertyLiterals'].each do |cookbook| + cookbooks << cookbook + end + end + cookbooks.each do |cb| + @kb.cookbooks[cb]['cookbook_property_referenced_cookbook'] = true + end + cookbooks.to_a.sort +end + +def determine_recipe_only_cookbooks + cookbooks = Set.new(@kb.cookbooks.keys) + cookbooks.subtract(Set.new(@kb.attributes.map { |_, c| c['cookbook'] })) + cookbooks.subtract(Set.new(@kb.libraries.map { |_, c| c['cookbook'] })) + cookbooks.subtract(Set.new(@kb.resources.map { |_, c| c['cookbook'] })) + cookbooks.subtract(Set.new(@kb.providers.map { |_, c| c['cookbook'] })) + cookbooks.each do |cb| + @kb.cookbooks[cb]['recipe_only_cookbook'] = true + end + cookbooks.to_a.sort +end + +def determine_cookbooks_using_dependency + @kb.cookbooks.each do |cb, m| + m['dependencies'].each do |dep| + @kb.cookbooks[dep]['cookbooks_using_as_dependency'] ||= Set.new + @kb.cookbooks[dep]['cookbooks_using_as_dependency'] << cb + end + end +end + +def determine_cookbook_dependencies + @kb.cookbooks.each do |cb, m| + m['dependencies'] ||= @kb.metadatarbs["#{cb}::metadata.rb"]['ExplicitMetadataDepends'].flatten.to_set + end +end + +def determine_explicitly_included_cookbooks + @kb.recipes.select do |_, m| + next unless m['IncludeRecipeLiterals'] + + @kb.cookbooks[m['cookbook']]['explicitly_included_cookbooks'] ||= Set.new + @kb.cookbooks[m['cookbook']]['explicitly_included_cookbooks'] += m['IncludeRecipeLiterals'].flatten.map do |x| + x.gsub(/::.*/, '') + end + end +end + +# Cookbooks with dynamic recipe inclusion can't be safely shaken :-( +def determine_cookbooks_with_dynamic_recipe_inclusion + @kb.recipes.select do |_, m| + if m['IncludeRecipeDynamic'] + @kb.cookbooks[m['cookbook']]['dynamic_recipe_inclusion'] = true + end + end +end + +def shakee_debug(shakee, msg) + # It's not always clear why cookbooks aren't showing up in the results + # (usually a result of convoluted dependency chains), so this variable is for + # debugging until there's granular logging facility support in Bookworm + debug = false + puts "SHAKEE #{shakee}: #{msg}" if debug +end + +def to_an_array + investigate = [] + + determine_role_referenced_cookbooks + determine_cookbook_dependencies + determine_cookbooks_using_dependency + determine_explicitly_included_cookbooks + determine_cookbooks_with_dynamic_recipe_inclusion + + # Save the values + crpc = determine_cookbook_property_referenced_cookbooks + roc = Set.new(determine_recipe_only_cookbooks) + + @kb.cookbooks.sort_by { |x| x[0] }.each do |shakee, smd| # smd - Shakee cookbook's bookworm metadata + # No dependencies to shake, move along + if smd['dependencies'].empty? + shakee_debug shakee, 'does not have dependencies' + next + end + + # If a cookbook has a dynamic include_recipe call, we can't safely assume + # what it calls, so we'll need to skip :-( + if smd['dynamic_recipe_inclusion'] + shakee_debug(shakee, 'is a cookbook with dynamic recipe inclusion') if smd['dynamic_recipe_inclusion'] + next + end + + # Copy dependencies of cookbook + shakee_deps = smd['dependencies'].dup + + # Remove non-recipe-only cookbooks + shakee_deps &= roc + next if shakee_deps.empty? + + # Remove cookbooks referenced by another cookbook through the + # `cookbook` property seen in cookbook_file/template/remote_directory + # resources + shakee_deps.subtract(crpc) + next if shakee_deps.empty? + + # Remove cookbooks that have an explicit include_recipe reference + shakee_debug shakee, "explicitly included cookbooks:\n#{smd['explicitly_included_cookbooks'].join("\n")}" + + shakee_deps.subtract(smd['explicitly_included_cookbooks']) + next if shakee_deps.empty? + + shakee_debug(shakee, 'is a role referenced cookbook') if smd['role_referenced_cookbook'] + shakee_debug(shakee, 'is also a recipe-only cookbook') if smd['recipe_only_cookbook'] + + # Does the shakee have the dependencies of the dependencies? + # This step is necessary to ensure that the cookbook load order isn't + # disrupted by transitive dependencies getting juggled around + # See https://github.com/chef/chef/blob/4ce99c419028c169aeb3e68ec954b79f20e654c5/lib/chef/run_context/cookbook_compiler.rb#L112-L127 + # and https://github.com/chef/chef/blob/4ce99c419028c169aeb3e68ec954b79f20e654c5/lib/chef/run_context/cookbook_compiler.rb#L395-L408 + # and https://github.com/chef/chef/blob/4ce99c419028c169aeb3e68ec954b79f20e654c5/lib/chef/run_context/cookbook_compiler.rb#L438-L443 + shakee_deps.each do |dep| + if @kb.cookbooks[dep]['dependencies'].subset? smd['dependencies'] + investigate << [shakee, dep] + shakee_debug shakee, "can likely remove #{dep}" + else + shakee_debug shakee, "DEP #{dep} also wants #{@kb.cookbooks[dep]['dependencies'] - smd['dependencies']}" + # If this cookbook isn't referenced by a role, we can easily assume that + # any cookbook that uses the shakee as a dependency has already loaded + # the cookbooks as well. + unless smd['role_referenced_cookbook'] + # This isn't a role-referenced cookbook, so we can try adding "parents" deps to the shakee dep list + if smd['cookbooks_using_as_dependency']&.all? do |parent_cb| + @kb.cookbooks[dep]['dependencies'].subset?((smd['dependencies']+@kb.cookbooks[parent_cb]['dependencies'])) + end + investigate << [shakee, dep] + shakee_debug shakee, "can likely remove #{dep} with parent cookbook dependencies in place" + end + end + end + end + end + investigate +end + +def to_a_string + buffer = '' + buffer << "WARNING: CookbookDepShaker results should *always* be reviewed for correctness\n" + to_an_array.each do |shakee, dep| + buffer << "Cookbook #{shakee} - can likely remove #{dep}\n" + end + buffer +end + +def output + to_a_string +end diff --git a/cookbooks/fb_bookworm/files/default/bookworm/rules/CookbookPropertyLiterals.rb b/cookbooks/fb_bookworm/files/default/bookworm/rules/CookbookPropertyLiterals.rb new file mode 100644 index 000000000..4b803e647 --- /dev/null +++ b/cookbooks/fb_bookworm/files/default/bookworm/rules/CookbookPropertyLiterals.rb @@ -0,0 +1,35 @@ +# Copyright (c) 2022-present, Meta, Inc. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +description 'Extracts cookbooks referenced by the cookbook property common with file resources' +keys ['recipe'] + +def_node_search :resource_blocks, '`(block (send nil? {:cookbook_file :template :remote_directory} _) (args) $...)' + +def_node_search :cookbook_literal, '`(send nil? :cookbook (:str $_))' + +def to_a + arr = [] + resource_blocks(@metadata['ast']).each do |res| + # We've got a resource block, check for cookbook property + res.each do |r| + cookbook_literal(r).each do |prop| + arr << prop + end + end + end + return [] if arr.empty? + arr.uniq! + arr.sort! +end diff --git a/cookbooks/fb_bookworm/files/default/bookworm/spec/rules/CookbookPropertyLiterals_spec.rb b/cookbooks/fb_bookworm/files/default/bookworm/spec/rules/CookbookPropertyLiterals_spec.rb new file mode 100644 index 000000000..b426ae0d3 --- /dev/null +++ b/cookbooks/fb_bookworm/files/default/bookworm/spec/rules/CookbookPropertyLiterals_spec.rb @@ -0,0 +1,73 @@ +# Copyright (c) 2022-present, Meta, Inc. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +require_relative './helper' + +describe Bookworm::InferRules::CookbookPropertyLiterals do + it 'returns empty array when no cookbook property' do + ast = generate_ast(<<~RUBY) + cookbook_file 'just_a_plain_old_resource' + RUBY + rule = described_class.new({ 'ast' => ast }) + expect(rule.output).to eq([]) + end + it 'returns cookbook that file resource uses (no begin block)' do + ast = generate_ast(<<~RUBY) + cookbook_file 'just_a_plain_old_resource' do + cookbook 'foo' + end + RUBY + rule = described_class.new({ + 'cookbook' => 'fake_cookbook', + 'ast' => ast, + }) + expect(rule.output).to eq(['foo']) + end + it 'returns cookbook that file resource uses (with begin block)' do + ast = generate_ast(<<~RUBY) + cookbook_file 'just_a_plain_old_resource' do + ignore 'this' + cookbook 'foo' + also 'ignore' + end + RUBY + rule = described_class.new({ + 'cookbook' => 'fake_cookbook', + 'ast' => ast, + }) + expect(rule.output).to eq(['foo']) + end + it 'returns multiple cookbooks used by known resource' do + ast = generate_ast(<<~RUBY) + file 'just_a_plain_old_resource' + + cookbook_file 'from_foo' do + cookbook 'foo' + end + + template 'from_bar' do + cookbook 'bar' + end + + remote_directory 'from_baz' do + cookbook 'baz' + end + RUBY + rule = described_class.new({ + 'cookbook' => 'fake_cookbook', + 'ast' => ast, + }) + expect(rule.output).to eq(['bar', 'baz', 'foo']) # results are sorted + end +end