Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6552,8 +6552,11 @@ static Function* gen_cfun_wrapper(
ctx.builder.ClearInsertionPoint();

if (aliasname) {
GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
auto alias = GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
GlobalValue::ExternalLinkage, aliasname, cw, M);
if(ctx.emission_context.TargetTriple.isOSBinFormatCOFF()) {
alias->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
}
}

if (nest) {
Expand Down
1 change: 1 addition & 0 deletions src/llvm-multiversioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,7 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
trampoline->removeFnAttr("julia.mv.reloc");
trampoline->removeFnAttr("julia.mv.clones");
trampoline->addFnAttr("julia.mv.alias");
trampoline->setDLLStorageClass(alias->getDLLStorageClass());
alias->eraseFromParent();

uint32_t id;
Expand Down
51 changes: 48 additions & 3 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1871,12 +1871,55 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
return res;
}

#ifndef __clang_gcanalyzer__
std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");
auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}
auto ntargets = image_targets.size();
if (image_targets.empty())
jl_error("No targets specified");
std::vector<jl_target_spec_t> res;
for (auto &target: jit_targets) {
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
if (t.en.flags & JL_TARGET_CLONE_ALL)
continue;
auto &features0 = image_targets[t.base].en.features;
// Always clone when code checks CPU features
t.en.flags |= JL_TARGET_CLONE_CPU;
static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
for (auto fe: clone_fp16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
break;
}
}
// The most useful one in general...
t.en.flags |= JL_TARGET_CLONE_LOOP;
#ifdef _CPU_ARM_
static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
for (auto fe: clone_math) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_MATH;
break;
}
}
static constexpr uint32_t clone_simd[] = {Feature::neon};
for (auto fe: clone_simd) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_SIMD;
break;
}
}
#endif
}
for (auto &target: image_targets) {
auto features_en = target.en.features;
auto features_dis = target.dis.features;
for (auto &fename: feature_names) {
Expand All @@ -1896,6 +1939,8 @@ std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
}
return res;
}
#endif


extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
{
Expand Down
21 changes: 18 additions & 3 deletions src/processor_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,26 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
return res;
}

#ifndef __clang_gcanalyzer__
extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");
auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<1>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}
auto ntargets = image_targets.size();
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
t.en.flags |= JL_TARGET_CLONE_ALL;
}
if (image_targets.empty())
jl_error("No image targets found");
std::vector<jl_target_spec_t> res;
for (auto &target: jit_targets) {
for (auto &target: image_targets) {
jl_target_spec_t ele;
std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
ele.data = serialize_target_data(target.name, target.en.features,
Expand All @@ -161,6 +175,7 @@ extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
}
return res;
}
#endif

JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
{
Expand Down
70 changes: 66 additions & 4 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1095,13 +1095,74 @@ extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_di
{feature_masks, 0}, {{}, 0}, 0});
return res;
}

#ifndef __clang_gcanalyzer__
extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
{
if (jit_targets.empty())
jl_error("JIT targets not initialized");
auto &cmdline = get_cmdline_targets();
check_cmdline(cmdline, true);
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
for (auto &arg: cmdline) {
auto data = arg_target_data(arg, image_targets.empty());
image_targets.push_back(std::move(data));
}

auto ntargets = image_targets.size();
// Now decide the clone condition.
for (size_t i = 1; i < ntargets; i++) {
auto &t = image_targets[i];
if (t.en.flags & JL_TARGET_CLONE_ALL)
continue;
// Always clone when code checks CPU features
t.en.flags |= JL_TARGET_CLONE_CPU;
// The most useful one in general...
t.en.flags |= JL_TARGET_CLONE_LOOP;
auto &features0 = image_targets[t.base].en.features;
// Special case for KNL/KNM since they're so different
if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
if ((t.name == "knl" || t.name == "knm") &&
image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
t.en.flags |= JL_TARGET_CLONE_ALL;
break;
}
}
static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
Feature::sse41, Feature::sse42,
Feature::avx, Feature::avx2,
Feature::vaes, Feature::vpclmulqdq,
Feature::sse4a, Feature::avx512f,
Feature::avx512dq, Feature::avx512ifma,
Feature::avx512pf, Feature::avx512er,
Feature::avx512cd, Feature::avx512bw,
Feature::avx512vl, Feature::avx512vbmi,
Feature::avx512vpopcntdq, Feature::avxvnni,
Feature::avx512vbmi2, Feature::avx512vnni,
Feature::avx512bitalg, Feature::avx512bf16,
Feature::avx512vp2intersect, Feature::avx512fp16};
for (auto fe: clone_math) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_MATH;
break;
}
}
for (auto fe: clone_simd) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_SIMD;
break;
}
}
static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
for (auto fe: clone_fp16) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
break;
}
}
}
if (image_targets.empty())
jl_error("No targets specified");
std::vector<jl_target_spec_t> res;
for (auto &target: jit_targets) {
for (auto &target: image_targets) {
auto features_en = target.en.features;
auto features_dis = target.dis.features;
for (auto &fename: feature_names) {
Expand All @@ -1121,6 +1182,7 @@ extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(
}
return res;
}
#endif

extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
{
Expand Down