Project Stage 2: Clone-Pruning Analysis Pass
In this stage, we will implement a custom GCC pass that analyzes cloned functions generated through Function Multi-Versioning (FMV), determines whether they are substantially the same, and emits pruning recommendations. This is part of a broader effort toward Automatic Function Multi-Versioning (AFMV).
First ,we copy and extract the SPO600 function multiversioning test archive:
mkdir ~/spo600-tests
cp /public/spo600-test-clone.tgz ~/spo600-tests/
cd ~/spo600-tests
tar -xvzf spo600-test-clone.tgz
Step1:Print detailed information about the GIMPLE statement number, statement type name (such as GIMPLE_ASSIGN), and operands.
#include "config.h" #include "system.h" #include "coretypes.h" #include "tree.h" #include "tree-pass.h" #include "cgraph.h" #include "function.h" #include "basic-block.h" #include "gimple.h" #include "gimple-iterator.h" #include "cfg.h" #include "context.h" #include "dumpfile.h" #include "tree-pretty-print.h" #include "gimple-ssa.h" namespace { const pass_data hxu_pass_data = { GIMPLE_PASS, // Type of pass "hxu132", // Pass name, used with -fdump-tree-hxu132 OPTGROUP_NONE, TV_NONE, PROP_cfg, 0, 0, 0, 0 }; class hxu_pass : public gimple_opt_pass { public: hxu_pass(gcc::context *ctxt) : gimple_opt_pass(hxu_pass_data, ctxt) {} bool gate(function *) override { return true; // Always run the pass } unsigned int execute(function *fun) override { int bb_cnt = 0, gimple_stmt_cnt = 0; basic_block bb; if (dump_file) { fprintf(dump_file, "===============================================================\n"); fprintf(dump_file, "Function: %s\n", function_name(fun)); fprintf(dump_file, "===============================================================\n"); } FOR_EACH_BB_FN(bb, fun) { bb_cnt++; int bb_stmt_cnt = 0; for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { gimple *stmt = gsi_stmt(gsi); bb_stmt_cnt++; if (dump_file) { fprintf(dump_file, "\n GIMPLE code: %d\n", gimple_code(stmt)); fprintf(dump_file, " GIMPLE code name: %s\n", gimple_code_name[gimple_code(stmt)]); for (unsigned int i = 0; i < gimple_num_ops(stmt); ++i) { tree op = gimple_op(stmt, i); if (op) { fprintf(dump_file, " Operand[%u]: ", i); print_generic_expr(dump_file, op, TDF_NONE); fprintf(dump_file, "\n"); } } fprintf(dump_file, "---------------------------------------------------------------\n"); } } gimple_stmt_cnt += bb_stmt_cnt; } if (dump_file) { fprintf(dump_file, "Total Basic Blocks: %d\n", bb_cnt); fprintf(dump_file, "Total GIMPLE Statements: %d\n", gimple_stmt_cnt); fprintf(dump_file, "\n\n"); } return 0; } }; } // anonymous namespace gimple_opt_pass *make_pass_hxu(gcc::context *ctxt) { return new hxu_pass(ctxt); }
cd ~/gcc-build-001
make -j$(nproc)
./xgcc -B. -O0 -fdump-tree-hxu132 -c hello.c
cat hello.c.270t.hxu132
Step 2:Find clone function
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "tree-pass.h"
#include "cgraph.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "cfg.h"
#include "context.h"
#include "dumpfile.h"
#include "tree-pretty-print.h"
#include "gimple-ssa.h"
namespace {
const pass_data hxu_pass_data = {
GIMPLE_PASS,
"hxu132", // Use -fdump-tree-hxu132 to dump
OPTGROUP_NONE,
TV_NONE,
PROP_cfg,
0, 0, 0, 0
};
class hxu_pass : public gimple_opt_pass {
public:
hxu_pass(gcc::context *ctxt) : gimple_opt_pass(hxu_pass_data, ctxt) {}
bool gate(function *) override {
return true;
}
unsigned int execute(function *fun) override {
int bb_cnt = 0, gimple_stmt_cnt = 0;
basic_block bb;
if (dump_file) {
const char* func_name = IDENTIFIER_POINTER(DECL_NAME(fun->decl));
fprintf(dump_file, "===============================================================\n");
fprintf(dump_file, "Function: %s\n", func_name);
fprintf(dump_file, "===============================================================\n");
// loop for each clone func
cgraph_node *node;
FOR_EACH_FUNCTION(node) {
if (!node->decl || !DECL_NAME(node->decl)) continue;
const char *name = IDENTIFIER_POINTER(DECL_NAME(node->decl));
if (strncmp(name, func_name, strlen(func_name)) == 0 &&
name[strlen(func_name)] == '.' &&
!strstr(name, ".resolver")) {
fprintf(dump_file, "Cloned function: %s\n", name);
}
}
fprintf(dump_file, "---------------------------------------------------------------\n");
}
FOR_EACH_BB_FN(bb, fun) {
bb_cnt++;
int bb_stmt_cnt = 0;
for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
gimple *stmt = gsi_stmt(gsi);
bb_stmt_cnt++;
if (dump_file) {
fprintf(dump_file, "\n GIMPLE code: %d\n", gimple_code(stmt));
fprintf(dump_file, " GIMPLE code name: %s\n", gimple_code_name[gimple_code(stmt)]);
for (unsigned int i = 0; i < gimple_num_ops(stmt); ++i) {
tree op = gimple_op(stmt, i);
if (op) {
fprintf(dump_file, " Operand[%u]: ", i);
print_generic_expr(dump_file, op, TDF_NONE);
fprintf(dump_file, "\n");
}
}
fprintf(dump_file, "---------------------------------------------------------------\n");
}
}
gimple_stmt_cnt += bb_stmt_cnt;
}
if (dump_file) {
fprintf(dump_file, "Total Basic Blocks: %d\n", bb_cnt);
fprintf(dump_file, "Total GIMPLE Statements: %d\n", gimple_stmt_cnt);
fprintf(dump_file, "\n\n");
}
return 0;
}
};
} // anonymous namespace
gimple_opt_pass *make_pass_hxu(gcc::context *ctxt) {
return new hxu_pass(ctxt);
}
cd ~/gcc-build-001
make -j$(nproc)
cd ~/spo600-tests/spo600/examples/test-clone
~/gcc-build-001/gcc/xgcc -B ~/gcc-build-001/gcc -O2 -fdump-tree-hxu132 \
-DCLONE_ATTRIBUTE='__attribute__((target_clones("default","rng")))' \
-c clone-test-core.c -o clone-test-prune.o
cat clone-test-prune.c.270t.hxu132
result for scale samples:
the function
scale_samples
has a cloned version named scale_samples.rng
generated by the compiler using the target_clones
attributeSTEP 3: Compare the Cloned Functions
In this step, the pass generates a unique signature for each detected clone based on the number of basic blocks and GIMPLE statements. If two clones have matching signatures, they are considered the same and marked with "PRUNE"; otherwise, they differ and are labeled "NOPRUNE."
#include "config.h" #include "system.h" #include "coretypes.h" #include "backend.h" #include "tree.h" #include "gimple.h" #include "pass_manager.h" #include "context.h" #include "diagnostic-core.h" #include "tree-pass.h" #include "ssa.h" #include "tree-pretty-print.h" #include "internal-fn.h" #include "gimple-iterator.h" #include "gimple-walk.h" #include "internal-fn.h" #include "tree-core.h" #include "basic-block.h" #include "gimple-ssa.h" #include "cgraph.h" #include "attribs.h" #include "pretty-print.h" #include "tree-inline.h" #include "intl.h" #include "dumpfile.h" #include "builtins.h" #include <map> #include <vector> #include <string> namespace { // Store function names and their GIMPLE codes std::map<std::string, std::vector<int>> function_gimple_map; const pass_data pass_data_hxu132 = { GIMPLE_PASS, "hxu132", // ← -fdump-tree-hxu132 OPTGROUP_NONE, TV_NONE, PROP_cfg, 0, 0, 0, 0, }; class pass_hxu132 : public gimple_opt_pass { public: pass_hxu132(gcc::context *ctxt) : gimple_opt_pass(pass_data_hxu132, ctxt) {} bool gate(function *) final override { return true; } unsigned int execute(function *fun) final override; static void find_and_print_cloned_functions(); }; unsigned int pass_hxu132::execute(function *fun) { int bb_count = 0; int gimple_stmt_count = 0; basic_block bb; std::string func_name = IDENTIFIER_POINTER(DECL_NAME(fun->decl)); function_gimple_map[func_name] = std::vector<int>(); if (dump_file) { fprintf(dump_file, "--------------------------------------------------------------------\n"); fprintf(dump_file, "%s\n", func_name.c_str()); fprintf(dump_file, "--------------------------------------------------------------------\n"); FOR_EACH_BB_FN(bb, fun) { bb_count++; int bb_gimple_count = 0; for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { gimple *stmt = gsi_stmt(gsi); bb_gimple_count++; function_gimple_map[func_name].push_back(gimple_code(stmt)); fprintf(dump_file, "\n GIMPLE code: %d\n", gimple_code(stmt)); fprintf(dump_file, " GIMPLE code name: %s\n", gimple_code_name[gimple_code(stmt)]); for (unsigned int i = 0; i < gimple_num_ops(stmt); i++) { tree op = gimple_op(stmt, i); if (op) { fprintf(dump_file, " Operand:"); print_generic_expr(dump_file, op, TDF_NONE); fprintf(dump_file, "\n"); } } fprintf(dump_file, "--------------------------------------------------------------------\n"); } gimple_stmt_count += bb_gimple_count; } fprintf(dump_file, "Total Basic Blocks: %d\n", bb_count); fprintf(dump_file, "Total GIMPLE Statements: %d\n", gimple_stmt_count); } find_and_print_cloned_functions(); return 0; } void pass_hxu132::find_and_print_cloned_functions() { if (dump_file) { fprintf(dump_file, "\n===== Finding Cloned Functions =====\n"); } for (const auto& base_entry : function_gimple_map) { const std::string& base_name = base_entry.first; bool found_related_function = false; for (const auto& entry : function_gimple_map) { const std::string& func_name = entry.first; if (func_name.find(base_name + ".") == 0 && func_name.find(".resolver") == std::string::npos) { if (!found_related_function) { if (dump_file) { fprintf(dump_file, "\nBase function: %s\n", base_name.c_str()); fprintf(dump_file, "GIMPLE Codes: "); for (int code : base_entry.second) { fprintf(dump_file, "%d ", code); } fprintf(dump_file, "\n---------------------------------------\n"); } found_related_function = true; } bool is_same = true; const auto& base_codes = base_entry.second; const auto& cloned_codes = entry.second; if (base_codes.size() == cloned_codes.size()) { for (size_t i = 0; i < base_codes.size(); ++i) { if (base_codes[i] != cloned_codes[i]) { is_same = false; break; } } } else { is_same = false; } if (dump_file) { fprintf(dump_file, "\nRelated cloned function: %s\n", func_name.c_str()); fprintf(dump_file, "GIMPLE Codes: "); for (int code : cloned_codes) { fprintf(dump_file, "%d ", code); } fprintf(dump_file, "\n"); if (is_same) { fprintf(dump_file, "PRUNE: %s\n", base_name.c_str()); } else { fprintf(dump_file, "NOPRUNE: %s\n", base_name.c_str()); } fprintf(dump_file, "---------------------------------------\n"); } } } } } } // anonymous namespace gimple_opt_pass *make_pass_hxu(gcc::context *ctxt) { return new pass_hxu132(ctxt); }
cd ~/gcc-build-001
find . -name config.cache -exec rm {} \;
make -j$(nproc)
grep -A20 "Finding Cloned Functions" clone-test-core.c.*.hxu132
scale_samples.rng as a clone of scale_samples and determined that their GIMPLE codes are identical:
#NOPRUNE
revise:
__attribute__((target_clones("default", "rng", "sve2"), __used__))
void scale_samples(int16_t *in, int16_t *out, int cnt, int volume) {
for (int x = 0; x < cnt; x++) {
#ifdef __ARM_FEATURE_SVE2__
out[x] = ((((int32_t)in[x]) * ((int32_t)(32767 * volume / 100) << 1)) >> 16) + 1;
#else
out[x] = ((((int32_t)in[x]) * ((int32_t)(32767 * volume / 100) << 1)) >> 16);
#endif
}
}
~/gcc-build-001/gcc/xgcc -B ~/gcc-build-001/gcc -O0 -fdump-tree-hxu132 \
-DCLONE_ATTRIBUTE='__attribute__((target_clones("default","sve2")))' \
-c clone-test-core.c -o noprune.o
Step 4: Compare the function's GIMPLE Code
#include "config.h" #include "system.h" #include "coretypes.h" #include "backend.h" #include "tree.h" #include "gimple.h" #include "pass_manager.h" #include "context.h" #include "diagnostic-core.h" #include "tree-pass.h" #include "ssa.h" #include "tree-pretty-print.h" #include "internal-fn.h" #include "gimple-iterator.h" #include "gimple-walk.h" #include "internal-fn.h" #include "tree-core.h" #include "basic-block.h" #include "gimple-ssa.h" #include "cgraph.h" #include "attribs.h" #include "pretty-print.h" #include "tree-inline.h" #include "intl.h" #include "dumpfile.h" #include "builtins.h" #include <map> #include <vector> #include <string> namespace { // Store function names and their GIMPLE instruction codes std::map<std::string, std::vector<int>> function_gimple_map; // Define pass metadata const pass_data pass_data_hxu132 = { GIMPLE_PASS, // Pass type "hxu132", // Pass name used with -fdump-tree-hxu132 OPTGROUP_NONE, TV_NONE, PROP_cfg, 0, 0, 0, 0 }; // Define the pass class class pass_hxu132 : public gimple_opt_pass { public: pass_hxu132(gcc::context *ctxt) : gimple_opt_pass(pass_data_hxu132, ctxt) {} bool gate(function *) final override { return true; // Always run } unsigned int execute(function *fun) final override; static void find_and_print_cloned_functions(); }; // Main logic of the pass: collect GIMPLE codes from each function unsigned int pass_hxu132::execute(function *fun) { int bb_count = 0; int gimple_stmt_count = 0; basic_block bb; std::string func_name = IDENTIFIER_POINTER(DECL_NAME(fun->decl)); function_gimple_map[func_name] = std::vector<int>(); if (dump_file) { fprintf(dump_file, "--------------------------------------------------------------------\n"); fprintf(dump_file, "%s\n", func_name.c_str()); fprintf(dump_file, "--------------------------------------------------------------------\n"); FOR_EACH_BB_FN(bb, fun) { bb_count++; int bb_gimple_count = 0; for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { gimple *stmt = gsi_stmt(gsi); bb_gimple_count++; // Save GIMPLE code of the statement function_gimple_map[func_name].push_back(gimple_code(stmt)); // Print statement info to dump file fprintf(dump_file, "\n GIMPLE code: %d\n", gimple_code(stmt)); fprintf(dump_file, " GIMPLE code name: %s\n", gimple_code_name[gimple_code(stmt)]); for (unsigned int i = 0; i < gimple_num_ops(stmt); i++) { tree op = gimple_op(stmt, i); if (op) { fprintf(dump_file, " Operand:"); print_generic_expr(dump_file, op, TDF_NONE); fprintf(dump_file, "\n"); } } fprintf(dump_file, "--------------------------------------------------------------------\n"); } gimple_stmt_count += bb_gimple_count; } fprintf(dump_file, "Total Basic Blocks: %d\n", bb_count); fprintf(dump_file, "Total GIMPLE Statements: %d\n", gimple_stmt_count); } find_and_print_cloned_functions(); return 0; } // Identify and compare cloned versions of functions void pass_hxu132::find_and_print_cloned_functions() { if (dump_file) { fprintf(dump_file, "\n===== Finding Cloned Functions =====\n"); } for (const auto& base_entry : function_gimple_map) { const std::string& base_name = base_entry.first; bool found_related_function = false; for (const auto& entry : function_gimple_map) { const std::string& func_name = entry.first; // Check if it's a clone of base function if (func_name.find(base_name + ".") == 0 && func_name.find(".resolver") == std::string::npos) { if (!found_related_function) { if (dump_file) { fprintf(dump_file, "\nBase function: %s\n", base_name.c_str()); fprintf(dump_file, "GIMPLE Codes: "); for (int code : base_entry.second) { fprintf(dump_file, "%d ", code); } fprintf(dump_file, "\n---------------------------------------\n"); } found_related_function = true; } bool is_same = true; const auto& base_codes = base_entry.second; const auto& cloned_codes = entry.second; // Compare GIMPLE code sequences if (base_codes.size() == cloned_codes.size()) { for (size_t i = 0; i < base_codes.size(); ++i) { if (base_codes[i] != cloned_codes[i]) { is_same = false; break; } } } else { is_same = false; } // Print results if (dump_file) { fprintf(dump_file, "\nRelated cloned function: %s\n", func_name.c_str()); fprintf(dump_file, "GIMPLE Codes: "); for (int code : cloned_codes) { fprintf(dump_file, "%d ", code); } fprintf(dump_file, "\n"); if (is_same) { fprintf(dump_file, "Result: PRUNE (Codes are identical)\n"); } else { fprintf(dump_file, "Result: NOPRUNE (Codes differ)\n"); } fprintf(dump_file, "---------------------------------------\n"); } } } } } } // anonymous namespace // Factory method for creating the pass // This must match the name used in pass-instances.def // e.g., make_pass_hxu(ctxt) gimple_opt_pass *make_pass_hxu(gcc::context *ctxt) { return new pass_hxu132(ctxt); }
touch ~/SomeLocalDir/gcc/hxu_pass.cc
make -j$(nproc)
~/spo600-tests/spo600/examples/test-clone/
~/gcc-build-001/gcc/xgcc -B ~/gcc-build-001/gcc -O2 -fdump-tree-hxu132 \
-DCLONE_ATTRIBUTE='__attribute__((target_clones("default","sve2")))' \
-c ~/spo600-tests/spo600/examples/test-clone/clone-test-core.c -o mytest.o
reflection:
Dump files were not always generated as expected, maydue to incorrect optimization levels, improper dump flag usage, or forgetting to rebuild the compiler. Clone detection relies heavily on matching function names (e.g., scale_samples vs. scale_samples.sve2), which can fail if naming conventions change or .resolver suffixes are not handled correctly. Another limitation is that GIMPLE statement comparisons are based only on gimple_code() values, which do not capture operand-level differences or structural changes, leading to inaccurate classification in some cases. The process successfully collected GIMPLE statistics, printed detailed statement information, and identified PRUNE vs. NOPRUNE clone relationships. I will do improvements include enhancing the statement comparison logic to consider operands and control flow, making the clone detection logic , and adding automated scripts for easier testing. Some features are not yet implemented, need more time to complete.
Comments
Post a Comment