Project Stage III: Tidy & Wrap
In this Stage III we focuse on enhanced testing rather than enabling full AFMV cloning.
revised code that analyze multiple cloned functions and generating PRUNE
/NOPRUNE
diagnostics for each clone set.
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "pass_manager.h"
#include "context.h"
#include "diagnostic-core.h"
#include "tree-pass.h"
#include "ssa.h"
#include "tree-pretty-print.h"
#include "internal-fn.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "tree-core.h"
#include "basic-block.h"
#include "gimple-ssa.h"
#include "cgraph.h"
#include "attribs.h"
#include "pretty-print.h"
#include "tree-inline.h"
#include "intl.h"
#include "dumpfile.h"
#include "builtins.h"
#include <map>
#include <vector>
#include <string>
namespace {
// Store all functions and their GIMPLE instruction code sequences
std::map<std::string, std::vector<int>> function_gimple_map;
// Define GCC pass metadata
const pass_data pass_data_hxu132 = {
GIMPLE_PASS,
"hxu132", // name used with -fdump-tree-hxu132
OPTGROUP_NONE,
TV_NONE,
PROP_cfg,
0,
0,
0,
0,
};
// Define pass class inheriting from gimple_opt_pass
class pass_hxu132 : public gimple_opt_pass {
public:
pass_hxu132(gcc::context *ctxt) : gimple_opt_pass(pass_data_hxu132, ctxt) {}
bool gate(function *) final override {
return true; // Always run
}
unsigned int execute(function *fun) final override;
static void find_and_print_cloned_functions();
};
// This method runs on each function
unsigned int pass_hxu132::execute(function *fun) {
int bb_count = 0;
int gimple_stmt_count = 0;
basic_block bb;
std::string func_name = IDENTIFIER_POINTER(DECL_NAME(fun->decl));
function_gimple_map[func_name] = std::vector<int>();
if (dump_file) {
fprintf(dump_file, "--------------------------------------------------------------------\n");
fprintf(dump_file, "%s\n", func_name.c_str());
fprintf(dump_file, "--------------------------------------------------------------------\n");
FOR_EACH_BB_FN(bb, fun) {
bb_count++;
int bb_gimple_count = 0;
for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
gimple *stmt = gsi_stmt(gsi);
bb_gimple_count++;
// Save gimple instruction code to vector
function_gimple_map[func_name].push_back(gimple_code(stmt));
fprintf(dump_file, "\n GIMPLE code: %d\n", gimple_code(stmt));
fprintf(dump_file, " GIMPLE code name: %s\n", gimple_code_name[gimple_code(stmt)]);
for (unsigned int i = 0; i < gimple_num_ops(stmt); i++) {
tree op = gimple_op(stmt, i);
if (op) {
fprintf(dump_file, " Operand:");
print_generic_expr(dump_file, op, TDF_NONE);
fprintf(dump_file, "\n");
}
}
fprintf(dump_file, "--------------------------------------------------------------------\n");
}
gimple_stmt_count += bb_gimple_count;
}
fprintf(dump_file, "Total Basic Blocks: %d\n", bb_count);
fprintf(dump_file, "Total GIMPLE Statements: %d\n", gimple_stmt_count);
}
find_and_print_cloned_functions();
return 0;
}
// Analyze and compare cloned functions with their base version
void pass_hxu132::find_and_print_cloned_functions() {
if (dump_file) {
fprintf(dump_file, "\n===== Finding Cloned Functions =====\n");
}
for (const auto& base_entry : function_gimple_map) {
const std::string& base_name = base_entry.first;
bool found_related_function = false;
for (const auto& entry : function_gimple_map) {
const std::string& func_name = entry.first;
// Identify cloned version of the base function
if (func_name.find(base_name + ".") == 0 &&
func_name.find(".resolver") == std::string::npos) {
if (!found_related_function) {
if (dump_file) {
fprintf(dump_file, "\nBase function: %s\n", base_name.c_str());
fprintf(dump_file, "GIMPLE Codes: ");
for (int code : base_entry.second) {
fprintf(dump_file, "%d ", code);
}
fprintf(dump_file, "\n---------------------------------------\n");
}
found_related_function = true;
}
const auto& base_codes = base_entry.second;
const auto& clone_codes = entry.second;
bool is_same = base_codes == clone_codes;
if (dump_file) {
fprintf(dump_file, "\nRelated cloned function: %s\n", func_name.c_str());
fprintf(dump_file, "GIMPLE Codes: ");
for (int code : clone_codes) {
fprintf(dump_file, "%d ", code);
}
fprintf(dump_file, "\n");
fprintf(dump_file, "Result: %s (%s)\n",
is_same ? "PRUNE" : "NOPRUNE",
is_same ? "Codes are identical" : "Codes differ");
fprintf(dump_file, "---------------------------------------\n");
}
}
}
}
}
} // end anonymous namespace
gimple_opt_pass *make_pass_hxu(gcc::context *ctxt) {
return new pass_hxu132(ctxt);
}
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "vol.h"
// Clone Set 1 — PRUNE: same logic
__attribute__((target_clones("default", "sve2"), __used__))
void add_samples(int16_t *in, int16_t *out, int cnt) {
for (int i = 0; i < cnt; i++) {
out[i] = in[i] + 1;
}
}
// Clone Set 2 — NOPRUNE: slightly different logic
__attribute__((target_clones("default", "sve2"), __used__))
void scale_samples(int16_t *in, int16_t *out, int cnt, int volume) {
#ifdef __ARM_FEATURE_SVE2__
for (int i = 0; i < cnt; i++) {
out[i] = (((int32_t)in[i]) * ((int32_t)(32767 * volume / 100) << 1) >> 16) + 1; // SVE2 clone: extra +1
}
#else
for (int i = 0; i < cnt; i++) {
out[i] = (((int32_t)in[i]) * ((int32_t)(32767 * volume / 100) << 1) >> 16);
}
#endif
}
int main() {
int16_t *in = (int16_t *)calloc(SAMPLES, sizeof(int16_t));
int16_t *out = (int16_t *)calloc(SAMPLES, sizeof(int16_t));
vol_createsample(in, SAMPLES);
add_samples(in, out, SAMPLES); // PRUNE
scale_samples(in, out, SAMPLES, VOLUME); // NOPRUNE
printf("Done\n");
free(in); free(out);
return 0;
}
X86:
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "vol.h"
// Define architecture-aware target_clones attribute
#if defined(__aarch64__)
#define TARGET_CLONES_ATTR __attribute__((target_clones("default", "sve2"), __used__))
#elif defined(__x86_64__)
#define TARGET_CLONES_ATTR __attribute__((target_clones("default", "avx2"), __used__))
#else
#define TARGET_CLONES_ATTR __attribute__((__used__)) // fallback
#endif
// Clone Set 1 — PRUNE: identical logic
TARGET_CLONES_ATTR
void add_samples(int16_t *in, int16_t *out, int cnt) {
for (int i = 0; i < cnt; i++) {
out[i] = in[i] + 1;
}
}
// Clone Set 2 — NOPRUNE: slightly different logic between versions
TARGET_CLONES_ATTR
void scale_samples(int16_t *in, int16_t *out, int cnt, int volume) {
#if defined(__ARM_FEATURE_SVE2__) || defined(__AVX2__)
// This clone differs: extra +1
for (int i = 0; i < cnt; i++) {
out[i] = ((((int32_t)in[i]) * ((int32_t)(32767 * volume / 100) << 1)) >> 16) + 1;
}
#else
for (int i = 0; i < cnt; i++) {
out[i] = ((((int32_t)in[i]) * ((int32_t)(32767 * volume / 100) << 1)) >> 16);
}
#endif
}
int main() {
int16_t *in = (int16_t *)calloc(SAMPLES, sizeof(int16_t));
int16_t *out = (int16_t *)calloc(SAMPLES, sizeof(int16_t));
vol_createsample(in, SAMPLES);
// Trigger both clone sets
add_samples(in, out, SAMPLES); // → PRUNE expected
scale_samples(in, out, SAMPLES, VOLUME); // → NOPRUNE expected
printf("Done\n");
free(in);
free(out);
return 0;
}
reflect:
I extended my custom GCC pass to support multiple sets of cloned functions and report PRUNE or NOPRUNE decisions for each. To verify its correctness, I created a test case that included two different clone sets—add_samples, which had identical GIMPLE logic across versions and should result in PRUNE, and scale_samples, which contained an extra arithmetic operation in the sve2 variant and was expected to yield NOPRUNE. The function clone resolver and multiple target clones were correctly recognized and distinguished by my pass. I confirmed that my logic handled multiple clone sets in the same translation unit and produced distinct results based on GIMPLE code comparisons. To fully validate my implementation, I tested the same program on two architectures: AArch64 (with -march=armv8-a+sve2) and x86_64 (with -march=native). On AArch64, I verified that function multiversioning and target-specific behavior worked correctly and triggered the correct __ARM_FEATURE_SVE2__ logic. On x86_64, I confirmed that although the sve2 target was not supported, the default version compiled and executed properly, and the pass still handled the structure of clone detection.
Comments
Post a Comment