Project Stage 2: Clone-Pruning Analysis Pass

In this stage, we will implement a custom GCC pass that analyzes cloned functions generated through Function Multi-Versioning (FMV), determines whether they are substantially the same, and emits pruning recommendations. This is part of a broader effort toward Automatic Function Multi-Versioning (AFMV).


First ,we copy and extract the SPO600 function multiversioning test archive:

mkdir ~/spo600-tests

cp /public/spo600-test-clone.tgz ~/spo600-tests/

cd ~/spo600-tests

tar -xvzf spo600-test-clone.tgz



Step1:Print detailed information about the GIMPLE statement number, statement type name (such as GIMPLE_ASSIGN), and operands.

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "tree-pass.h"
#include "cgraph.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "cfg.h"
#include "context.h"
#include "dumpfile.h"

#include "tree-pretty-print.h"
#include "gimple-ssa.h"

namespace {

const pass_data hxu_pass_data = {
    GIMPLE_PASS,          // Type of pass
    "hxu132",             // Pass name, used with -fdump-tree-hxu132
    OPTGROUP_NONE,
    TV_NONE,
    PROP_cfg,
    0, 0, 0, 0
};

class hxu_pass : public gimple_opt_pass {
public:
    hxu_pass(gcc::context *ctxt) : gimple_opt_pass(hxu_pass_data, ctxt) {}

    bool gate(function *) override {
        return true; // Always run the pass
    }

    unsigned int execute(function *fun) override {
        int bb_cnt = 0, gimple_stmt_cnt = 0;
        basic_block bb;

        if (dump_file) {
            fprintf(dump_file, "===============================================================\n");
            fprintf(dump_file, "Function: %s\n", function_name(fun));
            fprintf(dump_file, "===============================================================\n");
        }

        FOR_EACH_BB_FN(bb, fun) {
            bb_cnt++;
            int bb_stmt_cnt = 0;

            for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
                gimple *stmt = gsi_stmt(gsi);
                bb_stmt_cnt++;

                if (dump_file) {
                    fprintf(dump_file, "\n  GIMPLE code:       %d\n", gimple_code(stmt));
                    fprintf(dump_file, "  GIMPLE code name:  %s\n", gimple_code_name[gimple_code(stmt)]);

       
                    for (unsigned int i = 0; i < gimple_num_ops(stmt); ++i) {
                        tree op = gimple_op(stmt, i);
                        if (op) {
                            fprintf(dump_file, "    Operand[%u]: ", i);
                            print_generic_expr(dump_file, op, TDF_NONE);
                            fprintf(dump_file, "\n");
                        }
                    }

                    fprintf(dump_file, "---------------------------------------------------------------\n");
                }
            }

            gimple_stmt_cnt += bb_stmt_cnt;
        }

        if (dump_file) {
            fprintf(dump_file, "Total Basic Blocks: %d\n", bb_cnt);
            fprintf(dump_file, "Total GIMPLE Statements: %d\n", gimple_stmt_cnt);
            fprintf(dump_file, "\n\n");
        }

        return 0;
    }
};

} // anonymous namespace


gimple_opt_pass *make_pass_hxu(gcc::context *ctxt) {
    return new hxu_pass(ctxt);
}
cd ~/gcc-build-001
make -j$(nproc)
./xgcc -B. -O0 -fdump-tree-hxu132 -c hello.c

cat hello.c.270t.hxu132










Step 2:Find clone function

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "tree-pass.h"
#include "cgraph.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "cfg.h"
#include "context.h"
#include "dumpfile.h"
#include "tree-pretty-print.h"
#include "gimple-ssa.h"

namespace {

const pass_data hxu_pass_data = {
    GIMPLE_PASS,
    "hxu132",       // Use -fdump-tree-hxu132 to dump
    OPTGROUP_NONE,
    TV_NONE,
    PROP_cfg,
    0, 0, 0, 0
};

class hxu_pass : public gimple_opt_pass {
public:
    hxu_pass(gcc::context *ctxt) : gimple_opt_pass(hxu_pass_data, ctxt) {}

    bool gate(function *) override {
        return true;
    }

    unsigned int execute(function *fun) override {
        int bb_cnt = 0, gimple_stmt_cnt = 0;
        basic_block bb;

        if (dump_file) {
            const char* func_name = IDENTIFIER_POINTER(DECL_NAME(fun->decl));
            fprintf(dump_file, "===============================================================\n");
            fprintf(dump_file, "Function: %s\n", func_name);
            fprintf(dump_file, "===============================================================\n");

            // loop for each clone func
            cgraph_node *node;
            FOR_EACH_FUNCTION(node) {
                if (!node->decl || !DECL_NAME(node->decl)) continue;
                const char *name = IDENTIFIER_POINTER(DECL_NAME(node->decl));
                if (strncmp(name, func_name, strlen(func_name)) == 0 &&
                    name[strlen(func_name)] == '.' &&
                    !strstr(name, ".resolver")) {
                    fprintf(dump_file, "Cloned function: %s\n", name);
                }
            }
            fprintf(dump_file, "---------------------------------------------------------------\n");
        }

        FOR_EACH_BB_FN(bb, fun) {
            bb_cnt++;
            int bb_stmt_cnt = 0;

            for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
                gimple *stmt = gsi_stmt(gsi);
                bb_stmt_cnt++;

                if (dump_file) {
                    fprintf(dump_file, "\n  GIMPLE code:       %d\n", gimple_code(stmt));
                    fprintf(dump_file, "  GIMPLE code name:  %s\n", gimple_code_name[gimple_code(stmt)]);

                    for (unsigned int i = 0; i < gimple_num_ops(stmt); ++i) {
                        tree op = gimple_op(stmt, i);
                        if (op) {
                            fprintf(dump_file, "    Operand[%u]: ", i);
                            print_generic_expr(dump_file, op, TDF_NONE);
                            fprintf(dump_file, "\n");
                        }
                    }

                    fprintf(dump_file, "---------------------------------------------------------------\n");
                }
            }

            gimple_stmt_cnt += bb_stmt_cnt;
        }

        if (dump_file) {
            fprintf(dump_file, "Total Basic Blocks: %d\n", bb_cnt);
            fprintf(dump_file, "Total GIMPLE Statements: %d\n", gimple_stmt_cnt);
            fprintf(dump_file, "\n\n");
        }

        return 0;
    }
};

} // anonymous namespace

gimple_opt_pass *make_pass_hxu(gcc::context *ctxt) {
    return new hxu_pass(ctxt);
}
cd ~/gcc-build-001
make -j$(nproc)

cd ~/spo600-tests/spo600/examples/test-clone

~/gcc-build-001/gcc/xgcc -B ~/gcc-build-001/gcc -O2 -fdump-tree-hxu132 \
  -DCLONE_ATTRIBUTE='__attribute__((target_clones("default","rng")))' \
  -c clone-test-core.c -o clone-test-prune.o

cat clone-test-prune.c.270t.hxu132


result for scale samples:
the function scale_samples has a cloned version named scale_samples.rng generated by the compiler using the target_clones attribute


















STEP 3: Compare the Cloned Functions

In this step, the pass generates a unique signature for each detected clone based on the number of basic blocks and GIMPLE statements. If two clones have matching signatures, they are considered the same and marked with "PRUNE"; otherwise, they differ and are labeled "NOPRUNE."
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "pass_manager.h"
#include "context.h"
#include "diagnostic-core.h"
#include "tree-pass.h"
#include "ssa.h"
#include "tree-pretty-print.h"
#include "internal-fn.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "internal-fn.h"
#include "tree-core.h"
#include "basic-block.h"

#include "gimple-ssa.h"
#include "cgraph.h"
#include "attribs.h"
#include "pretty-print.h"
#include "tree-inline.h"
#include "intl.h"
#include "dumpfile.h"
#include "builtins.h"

#include <map>
#include <vector>
#include <string>

namespace {

// Store function names and their GIMPLE codes
std::map<std::string, std::vector<int>> function_gimple_map;

const pass_data pass_data_hxu132 = {
    GIMPLE_PASS,
    "hxu132", // ← -fdump-tree-hxu132
    OPTGROUP_NONE,
    TV_NONE,
    PROP_cfg,
    0,
    0,
    0,
    0,
};

class pass_hxu132 : public gimple_opt_pass {
public:
    pass_hxu132(gcc::context *ctxt)
        : gimple_opt_pass(pass_data_hxu132, ctxt) {}

    bool gate(function *) final override {
        return true;
    }

    unsigned int execute(function *fun) final override;
    static void find_and_print_cloned_functions();
};

unsigned int pass_hxu132::execute(function *fun) {
    int bb_count = 0;
    int gimple_stmt_count = 0;
    basic_block bb;

    std::string func_name = IDENTIFIER_POINTER(DECL_NAME(fun->decl));
    function_gimple_map[func_name] = std::vector<int>();

    if (dump_file) {
        fprintf(dump_file, "--------------------------------------------------------------------\n");
        fprintf(dump_file, "%s\n", func_name.c_str());
        fprintf(dump_file, "--------------------------------------------------------------------\n");

        FOR_EACH_BB_FN(bb, fun) {
            bb_count++;
            int bb_gimple_count = 0;

            for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
                gimple *stmt = gsi_stmt(gsi);
                bb_gimple_count++;

                function_gimple_map[func_name].push_back(gimple_code(stmt));

                fprintf(dump_file, "\n  GIMPLE code:       %d\n", gimple_code(stmt));
                fprintf(dump_file, "  GIMPLE code name:  %s\n", gimple_code_name[gimple_code(stmt)]);

                for (unsigned int i = 0; i < gimple_num_ops(stmt); i++) {
                    tree op = gimple_op(stmt, i);
                    if (op) {
                        fprintf(dump_file, "  Operand:");
                        print_generic_expr(dump_file, op, TDF_NONE);
                        fprintf(dump_file, "\n");
                    }
                }

                fprintf(dump_file, "--------------------------------------------------------------------\n");
            }

            gimple_stmt_count += bb_gimple_count;
        }

        fprintf(dump_file, "Total Basic Blocks: %d\n", bb_count);
        fprintf(dump_file, "Total GIMPLE Statements: %d\n", gimple_stmt_count);
    }

    find_and_print_cloned_functions();
    return 0;
}

void pass_hxu132::find_and_print_cloned_functions() {
    if (dump_file) {
        fprintf(dump_file, "\n===== Finding Cloned Functions =====\n");
    }

    for (const auto& base_entry : function_gimple_map) {
        const std::string& base_name = base_entry.first;
        bool found_related_function = false;

        for (const auto& entry : function_gimple_map) {
            const std::string& func_name = entry.first;

            if (func_name.find(base_name + ".") == 0 &&
                func_name.find(".resolver") == std::string::npos) {
                if (!found_related_function) {
                    if (dump_file) {
                        fprintf(dump_file, "\nBase function: %s\n", base_name.c_str());
                        fprintf(dump_file, "GIMPLE Codes: ");
                        for (int code : base_entry.second) {
                            fprintf(dump_file, "%d ", code);
                        }
                        fprintf(dump_file, "\n---------------------------------------\n");
                    }
                    found_related_function = true;
                }

                bool is_same = true;
                const auto& base_codes = base_entry.second;
                const auto& cloned_codes = entry.second;

                if (base_codes.size() == cloned_codes.size()) {
                    for (size_t i = 0; i < base_codes.size(); ++i) {
                        if (base_codes[i] != cloned_codes[i]) {
                            is_same = false;
                            break;
                        }
                    }
                } else {
                    is_same = false;
                }

                if (dump_file) {
                    fprintf(dump_file, "\nRelated cloned function: %s\n", func_name.c_str());
                    fprintf(dump_file, "GIMPLE Codes: ");
                    for (int code : cloned_codes) {
                        fprintf(dump_file, "%d ", code);
                    }
                    fprintf(dump_file, "\n");

                    if (is_same) {
                        fprintf(dump_file, "PRUNE: %s\n", base_name.c_str());
                    } else {
                        fprintf(dump_file, "NOPRUNE: %s\n", base_name.c_str());
                    }
                    fprintf(dump_file, "---------------------------------------\n");
                }
            }
        }
    }
}

} // anonymous namespace

gimple_opt_pass *make_pass_hxu(gcc::context *ctxt) {
    return new pass_hxu132(ctxt);
}
cd ~/gcc-build-001
find . -name config.cache -exec rm {} \;
make -j$(nproc)

grep -A20 "Finding Cloned Functions" clone-test-core.c.*.hxu132

scale_samples.rng as a clone of scale_samples and determined that their GIMPLE codes are identical:












#NOPRUNE 
revise:
__attribute__((target_clones("default", "rng", "sve2"), __used__))
void scale_samples(int16_t *in, int16_t *out, int cnt, int volume) {
    for (int x = 0; x < cnt; x++) {
#ifdef __ARM_FEATURE_SVE2__
        out[x] = ((((int32_t)in[x]) * ((int32_t)(32767 * volume / 100) << 1)) >> 16) + 1;
#else
        out[x] = ((((int32_t)in[x]) * ((int32_t)(32767 * volume / 100) << 1)) >> 16);
#endif
    }
}

~/gcc-build-001/gcc/xgcc -B ~/gcc-build-001/gcc -O0 -fdump-tree-hxu132 \
  -DCLONE_ATTRIBUTE='__attribute__((target_clones("default","sve2")))' \
  -c clone-test-core.c -o noprune.o





Step 4: Compare the function's GIMPLE Code
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "pass_manager.h"
#include "context.h"
#include "diagnostic-core.h"
#include "tree-pass.h"
#include "ssa.h"
#include "tree-pretty-print.h"
#include "internal-fn.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "internal-fn.h"
#include "tree-core.h"
#include "basic-block.h"
#include "gimple-ssa.h"
#include "cgraph.h"
#include "attribs.h"
#include "pretty-print.h"
#include "tree-inline.h"
#include "intl.h"
#include "dumpfile.h"
#include "builtins.h"

#include <map>
#include <vector>
#include <string>

namespace {

// Store function names and their GIMPLE instruction codes
std::map<std::string, std::vector<int>> function_gimple_map;

// Define pass metadata
const pass_data pass_data_hxu132 = {
    GIMPLE_PASS,          // Pass type
    "hxu132",             // Pass name used with -fdump-tree-hxu132
    OPTGROUP_NONE,        
    TV_NONE,              
    PROP_cfg,             
    0,                    
    0,                   
    0,                    
    0                     
};

// Define the pass class
class pass_hxu132 : public gimple_opt_pass {
public:
    pass_hxu132(gcc::context *ctxt) : gimple_opt_pass(pass_data_hxu132, ctxt) {}

    bool gate(function *) final override {
        return true;  // Always run
    }

    unsigned int execute(function *fun) final override;
    static void find_and_print_cloned_functions();
};

// Main logic of the pass: collect GIMPLE codes from each function
unsigned int pass_hxu132::execute(function *fun) {
    int bb_count = 0;
    int gimple_stmt_count = 0;
    basic_block bb;

    std::string func_name = IDENTIFIER_POINTER(DECL_NAME(fun->decl));
    function_gimple_map[func_name] = std::vector<int>();

    if (dump_file) {
        fprintf(dump_file, "--------------------------------------------------------------------\n");
        fprintf(dump_file, "%s\n", func_name.c_str());
        fprintf(dump_file, "--------------------------------------------------------------------\n");

        FOR_EACH_BB_FN(bb, fun) {
            bb_count++;
            int bb_gimple_count = 0;

            for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
                gimple *stmt = gsi_stmt(gsi);
                bb_gimple_count++;

                // Save GIMPLE code of the statement
                function_gimple_map[func_name].push_back(gimple_code(stmt));

                // Print statement info to dump file
                fprintf(dump_file, "\n  GIMPLE code:       %d\n", gimple_code(stmt));
                fprintf(dump_file, "  GIMPLE code name:  %s\n", gimple_code_name[gimple_code(stmt)]);

                for (unsigned int i = 0; i < gimple_num_ops(stmt); i++) {
                    tree op = gimple_op(stmt, i);
                    if (op) {
                        fprintf(dump_file, "  Operand:");
                        print_generic_expr(dump_file, op, TDF_NONE);
                        fprintf(dump_file, "\n");
                    }
                }

                fprintf(dump_file, "--------------------------------------------------------------------\n");
            }

            gimple_stmt_count += bb_gimple_count;
        }

        fprintf(dump_file, "Total Basic Blocks: %d\n", bb_count);
        fprintf(dump_file, "Total GIMPLE Statements: %d\n", gimple_stmt_count);
    }

    find_and_print_cloned_functions();
    return 0;
}

// Identify and compare cloned versions of functions
void pass_hxu132::find_and_print_cloned_functions() {
    if (dump_file) {
        fprintf(dump_file, "\n===== Finding Cloned Functions =====\n");
    }

    for (const auto& base_entry : function_gimple_map) {
        const std::string& base_name = base_entry.first;
        bool found_related_function = false;

        for (const auto& entry : function_gimple_map) {
            const std::string& func_name = entry.first;

            // Check if it's a clone of base function
            if (func_name.find(base_name + ".") == 0 &&
                func_name.find(".resolver") == std::string::npos) {

                if (!found_related_function) {
                    if (dump_file) {
                        fprintf(dump_file, "\nBase function: %s\n", base_name.c_str());
                        fprintf(dump_file, "GIMPLE Codes: ");
                        for (int code : base_entry.second) {
                            fprintf(dump_file, "%d ", code);
                        }
                        fprintf(dump_file, "\n---------------------------------------\n");
                    }
                    found_related_function = true;
                }

                bool is_same = true;
                const auto& base_codes = base_entry.second;
                const auto& cloned_codes = entry.second;

                // Compare GIMPLE code sequences
                if (base_codes.size() == cloned_codes.size()) {
                    for (size_t i = 0; i < base_codes.size(); ++i) {
                        if (base_codes[i] != cloned_codes[i]) {
                            is_same = false;
                            break;
                        }
                    }
                } else {
                    is_same = false;
                }

                // Print results
                if (dump_file) {
                    fprintf(dump_file, "\nRelated cloned function: %s\n", func_name.c_str());
                    fprintf(dump_file, "GIMPLE Codes: ");
                    for (int code : cloned_codes) {
                        fprintf(dump_file, "%d ", code);
                    }
                    fprintf(dump_file, "\n");

                    if (is_same) {
                        fprintf(dump_file, "Result: PRUNE (Codes are identical)\n");
                    } else {
                        fprintf(dump_file, "Result: NOPRUNE (Codes differ)\n");
                    }
                    fprintf(dump_file, "---------------------------------------\n");
                }
            }
        }
    }
}

} // anonymous namespace

// Factory method for creating the pass
// This must match the name used in pass-instances.def
// e.g., make_pass_hxu(ctxt)
gimple_opt_pass *make_pass_hxu(gcc::context *ctxt) {
    return new pass_hxu132(ctxt);
}
touch ~/SomeLocalDir/gcc/hxu_pass.cc
make -j$(nproc)

~/spo600-tests/spo600/examples/test-clone/

~/gcc-build-001/gcc/xgcc -B ~/gcc-build-001/gcc -O2 -fdump-tree-hxu132 \
  -DCLONE_ATTRIBUTE='__attribute__((target_clones("default","sve2")))' \
  -c ~/spo600-tests/spo600/examples/test-clone/clone-test-core.c -o mytest.o










reflection:

Dump files were not always generated as expected, maydue to incorrect optimization levels, improper dump flag usage, or forgetting to rebuild the compiler. Clone detection relies heavily on matching function names (e.g., scale_samples vs. scale_samples.sve2), which can fail if naming conventions change or .resolver suffixes are not handled correctly. Another limitation is that GIMPLE statement comparisons are based only on gimple_code() values, which do not capture operand-level differences or structural changes, leading to inaccurate classification in some cases. The process successfully collected GIMPLE statistics, printed detailed statement information, and identified PRUNE vs. NOPRUNE clone relationships. I will do improvements  include enhancing the statement comparison logic to consider operands and control flow, making the clone detection logic , and adding automated scripts for easier testing. Some features are not yet implemented, need more time to complete.




Comments

Popular posts from this blog

(lab1)6502 Assembly Language Lab

(Lab4) GCC Build Lab

(lab2)6502 Math Lab