mirror of
https://github.com/opnsense/src.git
synced 2026-05-28 04:12:45 -04:00
Vendor import of llvm trunk r256945:
https://llvm.org/svn/llvm-project/llvm/trunk@256945
This commit is contained in:
parent
84fe440ded
commit
8a6c1c25bc
244 changed files with 8769 additions and 3695 deletions
|
|
@ -26,7 +26,10 @@ Quick start
|
|||
We use here the command-line, non-interactive CMake interface.
|
||||
|
||||
#. `Download <http://www.cmake.org/cmake/resources/software.html>`_ and install
|
||||
CMake. Version 2.8.8 is the minimum required.
|
||||
CMake. Version 2.8.8 is the minimum required, but if you're using the Ninja
|
||||
backend, CMake v3.2 or newer is required to `get interactive output
|
||||
<http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20141117/244797.html>`_
|
||||
when running :doc:`Lit <CommandGuide/lit>`.
|
||||
|
||||
#. Open a shell. Your development tools must be reachable from this shell
|
||||
through the PATH environment variable.
|
||||
|
|
|
|||
|
|
@ -241,15 +241,25 @@ For example, let’s consider a C file and how it gets compiled to LLVM:
|
|||
return 13;
|
||||
}
|
||||
|
||||
The coverage mapping variable generated by Clang is:
|
||||
The coverage mapping variable generated by Clang has 3 fields:
|
||||
|
||||
* Coverage mapping header.
|
||||
|
||||
* An array of function records.
|
||||
|
||||
* Coverage mapping data which is an array of bytes. Zero paddings are added at the end to force 8 byte alignment.
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
@__llvm_coverage_mapping = internal constant { i32, i32, i32, i32, [2 x { i8*, i32, i32 }], [40 x i8] }
|
||||
{ i32 2, ; The number of function records
|
||||
i32 20, ; The length of the string that contains the encoded translation unit filenames
|
||||
i32 20, ; The length of the string that contains the encoded coverage mapping data
|
||||
i32 0, ; Coverage mapping format version
|
||||
@__llvm_coverage_mapping = internal constant { { i32, i32, i32, i32 }, [2 x { i8*, i32, i32 }], [40 x i8] }
|
||||
{
|
||||
{ i32, i32, i32, i32 } ; Coverage map header
|
||||
{
|
||||
i32 2, ; The number of function records
|
||||
i32 20, ; The length of the string that contains the encoded translation unit filenames
|
||||
i32 20, ; The length of the string that contains the encoded coverage mapping data
|
||||
i32 0, ; Coverage mapping format version
|
||||
},
|
||||
[2 x { i8*, i32, i32 }] [ ; Function records
|
||||
{ i8*, i32, i32 } { i8* getelementptr inbounds ([3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), ; Function's name
|
||||
i32 3, ; Function's name length
|
||||
|
|
@ -262,12 +272,18 @@ The coverage mapping variable generated by Clang is:
|
|||
[40 x i8] c"..." ; Encoded data (dissected later)
|
||||
}, section "__llvm_covmap", align 8
|
||||
|
||||
Version:
|
||||
--------
|
||||
Coverage Mapping Header:
|
||||
------------------------
|
||||
|
||||
The coverage mapping version number can have the following values:
|
||||
The coverage mapping header has the following fields:
|
||||
|
||||
* 0 — The first (current) version of the coverage mapping format.
|
||||
* The number of function records.
|
||||
|
||||
* The length of the string in the third field of *__llvm_coverage_mapping* that contains the encoded translation unit filenames.
|
||||
|
||||
* The length of the string in the third field of *__llvm_coverage_mapping* that contains the encoded coverage mapping data.
|
||||
|
||||
* The format version. 0 is the first (current) version of the coverage mapping format.
|
||||
|
||||
.. _function records:
|
||||
|
||||
|
|
@ -331,7 +347,7 @@ IR for the `coverage mapping sample`_ that was shown earlier:
|
|||
* The length of the substring that contains the encoded coverage mapping data
|
||||
for the first function is the value of the third field in the first
|
||||
structure in an array of `function records`_ stored in the
|
||||
fifth field of the *__llvm_coverage_mapping* structure, which is the 9.
|
||||
third field of the *__llvm_coverage_mapping* structure, which is the 9.
|
||||
Therefore, the coverage mapping for the first function record is encoded
|
||||
in this string:
|
||||
|
||||
|
|
@ -351,7 +367,7 @@ IR for the `coverage mapping sample`_ that was shown earlier:
|
|||
| ``0x01`` | The number of mapping regions that are stored in an array for the function's file id #0. |
|
||||
+----------+-------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``0x01`` | The coverage mapping counter for the first region in this function. The value of 1 tells us that it's a coverage |
|
||||
| | mapping counter that is a reference ot the profile instrumentation counter with an index of 0. |
|
||||
| | mapping counter that is a reference to the profile instrumentation counter with an index of 0. |
|
||||
+----------+-------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``0x01`` | The starting line of the first mapping region in this function. |
|
||||
+----------+-------------------------------------------------------------------------------------------------------------------------+
|
||||
|
|
|
|||
|
|
@ -78,6 +78,8 @@ Here's the short story for getting up and running quickly with LLVM:
|
|||
|
||||
The usual build uses `CMake <CMake.html>`_. If you would rather use
|
||||
autotools, see `Building LLVM with autotools <BuildingLLVMWithAutotools.html>`_.
|
||||
Although the build is known to work with CMake >= 2.8.8, we recommend CMake
|
||||
>= v3.2, especially if you're generating Ninja build files.
|
||||
|
||||
* ``cd where you want to build llvm``
|
||||
* ``mkdir build``
|
||||
|
|
|
|||
|
|
@ -1,180 +1,180 @@
|
|||
===============================
|
||||
MCJIT Design and Implementation
|
||||
===============================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This document describes the internal workings of the MCJIT execution
|
||||
engine and the RuntimeDyld component. It is intended as a high level
|
||||
overview of the implementation, showing the flow and interactions of
|
||||
objects throughout the code generation and dynamic loading process.
|
||||
|
||||
Engine Creation
|
||||
===============
|
||||
|
||||
In most cases, an EngineBuilder object is used to create an instance of
|
||||
the MCJIT execution engine. The EngineBuilder takes an llvm::Module
|
||||
object as an argument to its constructor. The client may then set various
|
||||
options that we control the later be passed along to the MCJIT engine,
|
||||
including the selection of MCJIT as the engine type to be created.
|
||||
Of particular interest is the EngineBuilder::setMCJITMemoryManager
|
||||
function. If the client does not explicitly create a memory manager at
|
||||
this time, a default memory manager (specifically SectionMemoryManager)
|
||||
will be created when the MCJIT engine is instantiated.
|
||||
|
||||
Once the options have been set, a client calls EngineBuilder::create to
|
||||
create an instance of the MCJIT engine. If the client does not use the
|
||||
form of this function that takes a TargetMachine as a parameter, a new
|
||||
TargetMachine will be created based on the target triple associated with
|
||||
the Module that was used to create the EngineBuilder.
|
||||
|
||||
.. image:: MCJIT-engine-builder.png
|
||||
|
||||
EngineBuilder::create will call the static MCJIT::createJIT function,
|
||||
passing in its pointers to the module, memory manager and target machine
|
||||
objects, all of which will subsequently be owned by the MCJIT object.
|
||||
|
||||
The MCJIT class has a member variable, Dyld, which contains an instance of
|
||||
the RuntimeDyld wrapper class. This member will be used for
|
||||
communications between MCJIT and the actual RuntimeDyldImpl object that
|
||||
gets created when an object is loaded.
|
||||
|
||||
.. image:: MCJIT-creation.png
|
||||
|
||||
Upon creation, MCJIT holds a pointer to the Module object that it received
|
||||
from EngineBuilder but it does not immediately generate code for this
|
||||
module. Code generation is deferred until either the
|
||||
MCJIT::finalizeObject method is called explicitly or a function such as
|
||||
MCJIT::getPointerToFunction is called which requires the code to have been
|
||||
generated.
|
||||
|
||||
Code Generation
|
||||
===============
|
||||
|
||||
When code generation is triggered, as described above, MCJIT will first
|
||||
attempt to retrieve an object image from its ObjectCache member, if one
|
||||
has been set. If a cached object image cannot be retrieved, MCJIT will
|
||||
call its emitObject method. MCJIT::emitObject uses a local PassManager
|
||||
instance and creates a new ObjectBufferStream instance, both of which it
|
||||
passes to TargetMachine::addPassesToEmitMC before calling PassManager::run
|
||||
on the Module with which it was created.
|
||||
|
||||
.. image:: MCJIT-load.png
|
||||
|
||||
The PassManager::run call causes the MC code generation mechanisms to emit
|
||||
a complete relocatable binary object image (either in either ELF or MachO
|
||||
format, depending on the target) into the ObjectBufferStream object, which
|
||||
is flushed to complete the process. If an ObjectCache is being used, the
|
||||
image will be passed to the ObjectCache here.
|
||||
|
||||
At this point, the ObjectBufferStream contains the raw object image.
|
||||
Before the code can be executed, the code and data sections from this
|
||||
image must be loaded into suitable memory, relocations must be applied and
|
||||
memory permission and code cache invalidation (if required) must be completed.
|
||||
|
||||
Object Loading
|
||||
==============
|
||||
|
||||
Once an object image has been obtained, either through code generation or
|
||||
having been retrieved from an ObjectCache, it is passed to RuntimeDyld to
|
||||
be loaded. The RuntimeDyld wrapper class examines the object to determine
|
||||
its file format and creates an instance of either RuntimeDyldELF or
|
||||
RuntimeDyldMachO (both of which derive from the RuntimeDyldImpl base
|
||||
class) and calls the RuntimeDyldImpl::loadObject method to perform that
|
||||
actual loading.
|
||||
|
||||
.. image:: MCJIT-dyld-load.png
|
||||
|
||||
RuntimeDyldImpl::loadObject begins by creating an ObjectImage instance
|
||||
from the ObjectBuffer it received. ObjectImage, which wraps the
|
||||
ObjectFile class, is a helper class which parses the binary object image
|
||||
and provides access to the information contained in the format-specific
|
||||
headers, including section, symbol and relocation information.
|
||||
|
||||
RuntimeDyldImpl::loadObject then iterates through the symbols in the
|
||||
image. Information about common symbols is collected for later use. For
|
||||
each function or data symbol, the associated section is loaded into memory
|
||||
and the symbol is stored in a symbol table map data structure. When the
|
||||
iteration is complete, a section is emitted for the common symbols.
|
||||
|
||||
Next, RuntimeDyldImpl::loadObject iterates through the sections in the
|
||||
object image and for each section iterates through the relocations for
|
||||
that sections. For each relocation, it calls the format-specific
|
||||
processRelocationRef method, which will examine the relocation and store
|
||||
it in one of two data structures, a section-based relocation list map and
|
||||
an external symbol relocation map.
|
||||
|
||||
.. image:: MCJIT-load-object.png
|
||||
|
||||
When RuntimeDyldImpl::loadObject returns, all of the code and data
|
||||
sections for the object will have been loaded into memory allocated by the
|
||||
memory manager and relocation information will have been prepared, but the
|
||||
relocations have not yet been applied and the generated code is still not
|
||||
ready to be executed.
|
||||
|
||||
[Currently (as of August 2013) the MCJIT engine will immediately apply
|
||||
relocations when loadObject completes. However, this shouldn't be
|
||||
happening. Because the code may have been generated for a remote target,
|
||||
the client should be given a chance to re-map the section addresses before
|
||||
relocations are applied. It is possible to apply relocations multiple
|
||||
times, but in the case where addresses are to be re-mapped, this first
|
||||
application is wasted effort.]
|
||||
|
||||
Address Remapping
|
||||
=================
|
||||
|
||||
At any time after initial code has been generated and before
|
||||
finalizeObject is called, the client can remap the address of sections in
|
||||
the object. Typically this is done because the code was generated for an
|
||||
external process and is being mapped into that process' address space.
|
||||
The client remaps the section address by calling MCJIT::mapSectionAddress.
|
||||
This should happen before the section memory is copied to its new
|
||||
location.
|
||||
|
||||
When MCJIT::mapSectionAddress is called, MCJIT passes the call on to
|
||||
RuntimeDyldImpl (via its Dyld member). RuntimeDyldImpl stores the new
|
||||
address in an internal data structure but does not update the code at this
|
||||
time, since other sections are likely to change.
|
||||
|
||||
When the client is finished remapping section addresses, it will call
|
||||
MCJIT::finalizeObject to complete the remapping process.
|
||||
|
||||
Final Preparations
|
||||
==================
|
||||
|
||||
When MCJIT::finalizeObject is called, MCJIT calls
|
||||
RuntimeDyld::resolveRelocations. This function will attempt to locate any
|
||||
external symbols and then apply all relocations for the object.
|
||||
|
||||
External symbols are resolved by calling the memory manager's
|
||||
getPointerToNamedFunction method. The memory manager will return the
|
||||
address of the requested symbol in the target address space. (Note, this
|
||||
may not be a valid pointer in the host process.) RuntimeDyld will then
|
||||
iterate through the list of relocations it has stored which are associated
|
||||
with this symbol and invoke the resolveRelocation method which, through an
|
||||
format-specific implementation, will apply the relocation to the loaded
|
||||
section memory.
|
||||
|
||||
Next, RuntimeDyld::resolveRelocations iterates through the list of
|
||||
sections and for each section iterates through a list of relocations that
|
||||
have been saved which reference that symbol and call resolveRelocation for
|
||||
each entry in this list. The relocation list here is a list of
|
||||
relocations for which the symbol associated with the relocation is located
|
||||
in the section associated with the list. Each of these locations will
|
||||
have a target location at which the relocation will be applied that is
|
||||
likely located in a different section.
|
||||
|
||||
.. image:: MCJIT-resolve-relocations.png
|
||||
|
||||
Once relocations have been applied as described above, MCJIT calls
|
||||
RuntimeDyld::getEHFrameSection, and if a non-zero result is returned
|
||||
passes the section data to the memory manager's registerEHFrames method.
|
||||
This allows the memory manager to call any desired target-specific
|
||||
functions, such as registering the EH frame information with a debugger.
|
||||
|
||||
Finally, MCJIT calls the memory manager's finalizeMemory method. In this
|
||||
method, the memory manager will invalidate the target code cache, if
|
||||
necessary, and apply final permissions to the memory pages it has
|
||||
allocated for code and data memory.
|
||||
|
||||
===============================
|
||||
MCJIT Design and Implementation
|
||||
===============================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This document describes the internal workings of the MCJIT execution
|
||||
engine and the RuntimeDyld component. It is intended as a high level
|
||||
overview of the implementation, showing the flow and interactions of
|
||||
objects throughout the code generation and dynamic loading process.
|
||||
|
||||
Engine Creation
|
||||
===============
|
||||
|
||||
In most cases, an EngineBuilder object is used to create an instance of
|
||||
the MCJIT execution engine. The EngineBuilder takes an llvm::Module
|
||||
object as an argument to its constructor. The client may then set various
|
||||
options that we control the later be passed along to the MCJIT engine,
|
||||
including the selection of MCJIT as the engine type to be created.
|
||||
Of particular interest is the EngineBuilder::setMCJITMemoryManager
|
||||
function. If the client does not explicitly create a memory manager at
|
||||
this time, a default memory manager (specifically SectionMemoryManager)
|
||||
will be created when the MCJIT engine is instantiated.
|
||||
|
||||
Once the options have been set, a client calls EngineBuilder::create to
|
||||
create an instance of the MCJIT engine. If the client does not use the
|
||||
form of this function that takes a TargetMachine as a parameter, a new
|
||||
TargetMachine will be created based on the target triple associated with
|
||||
the Module that was used to create the EngineBuilder.
|
||||
|
||||
.. image:: MCJIT-engine-builder.png
|
||||
|
||||
EngineBuilder::create will call the static MCJIT::createJIT function,
|
||||
passing in its pointers to the module, memory manager and target machine
|
||||
objects, all of which will subsequently be owned by the MCJIT object.
|
||||
|
||||
The MCJIT class has a member variable, Dyld, which contains an instance of
|
||||
the RuntimeDyld wrapper class. This member will be used for
|
||||
communications between MCJIT and the actual RuntimeDyldImpl object that
|
||||
gets created when an object is loaded.
|
||||
|
||||
.. image:: MCJIT-creation.png
|
||||
|
||||
Upon creation, MCJIT holds a pointer to the Module object that it received
|
||||
from EngineBuilder but it does not immediately generate code for this
|
||||
module. Code generation is deferred until either the
|
||||
MCJIT::finalizeObject method is called explicitly or a function such as
|
||||
MCJIT::getPointerToFunction is called which requires the code to have been
|
||||
generated.
|
||||
|
||||
Code Generation
|
||||
===============
|
||||
|
||||
When code generation is triggered, as described above, MCJIT will first
|
||||
attempt to retrieve an object image from its ObjectCache member, if one
|
||||
has been set. If a cached object image cannot be retrieved, MCJIT will
|
||||
call its emitObject method. MCJIT::emitObject uses a local PassManager
|
||||
instance and creates a new ObjectBufferStream instance, both of which it
|
||||
passes to TargetMachine::addPassesToEmitMC before calling PassManager::run
|
||||
on the Module with which it was created.
|
||||
|
||||
.. image:: MCJIT-load.png
|
||||
|
||||
The PassManager::run call causes the MC code generation mechanisms to emit
|
||||
a complete relocatable binary object image (either in either ELF or MachO
|
||||
format, depending on the target) into the ObjectBufferStream object, which
|
||||
is flushed to complete the process. If an ObjectCache is being used, the
|
||||
image will be passed to the ObjectCache here.
|
||||
|
||||
At this point, the ObjectBufferStream contains the raw object image.
|
||||
Before the code can be executed, the code and data sections from this
|
||||
image must be loaded into suitable memory, relocations must be applied and
|
||||
memory permission and code cache invalidation (if required) must be completed.
|
||||
|
||||
Object Loading
|
||||
==============
|
||||
|
||||
Once an object image has been obtained, either through code generation or
|
||||
having been retrieved from an ObjectCache, it is passed to RuntimeDyld to
|
||||
be loaded. The RuntimeDyld wrapper class examines the object to determine
|
||||
its file format and creates an instance of either RuntimeDyldELF or
|
||||
RuntimeDyldMachO (both of which derive from the RuntimeDyldImpl base
|
||||
class) and calls the RuntimeDyldImpl::loadObject method to perform that
|
||||
actual loading.
|
||||
|
||||
.. image:: MCJIT-dyld-load.png
|
||||
|
||||
RuntimeDyldImpl::loadObject begins by creating an ObjectImage instance
|
||||
from the ObjectBuffer it received. ObjectImage, which wraps the
|
||||
ObjectFile class, is a helper class which parses the binary object image
|
||||
and provides access to the information contained in the format-specific
|
||||
headers, including section, symbol and relocation information.
|
||||
|
||||
RuntimeDyldImpl::loadObject then iterates through the symbols in the
|
||||
image. Information about common symbols is collected for later use. For
|
||||
each function or data symbol, the associated section is loaded into memory
|
||||
and the symbol is stored in a symbol table map data structure. When the
|
||||
iteration is complete, a section is emitted for the common symbols.
|
||||
|
||||
Next, RuntimeDyldImpl::loadObject iterates through the sections in the
|
||||
object image and for each section iterates through the relocations for
|
||||
that sections. For each relocation, it calls the format-specific
|
||||
processRelocationRef method, which will examine the relocation and store
|
||||
it in one of two data structures, a section-based relocation list map and
|
||||
an external symbol relocation map.
|
||||
|
||||
.. image:: MCJIT-load-object.png
|
||||
|
||||
When RuntimeDyldImpl::loadObject returns, all of the code and data
|
||||
sections for the object will have been loaded into memory allocated by the
|
||||
memory manager and relocation information will have been prepared, but the
|
||||
relocations have not yet been applied and the generated code is still not
|
||||
ready to be executed.
|
||||
|
||||
[Currently (as of August 2013) the MCJIT engine will immediately apply
|
||||
relocations when loadObject completes. However, this shouldn't be
|
||||
happening. Because the code may have been generated for a remote target,
|
||||
the client should be given a chance to re-map the section addresses before
|
||||
relocations are applied. It is possible to apply relocations multiple
|
||||
times, but in the case where addresses are to be re-mapped, this first
|
||||
application is wasted effort.]
|
||||
|
||||
Address Remapping
|
||||
=================
|
||||
|
||||
At any time after initial code has been generated and before
|
||||
finalizeObject is called, the client can remap the address of sections in
|
||||
the object. Typically this is done because the code was generated for an
|
||||
external process and is being mapped into that process' address space.
|
||||
The client remaps the section address by calling MCJIT::mapSectionAddress.
|
||||
This should happen before the section memory is copied to its new
|
||||
location.
|
||||
|
||||
When MCJIT::mapSectionAddress is called, MCJIT passes the call on to
|
||||
RuntimeDyldImpl (via its Dyld member). RuntimeDyldImpl stores the new
|
||||
address in an internal data structure but does not update the code at this
|
||||
time, since other sections are likely to change.
|
||||
|
||||
When the client is finished remapping section addresses, it will call
|
||||
MCJIT::finalizeObject to complete the remapping process.
|
||||
|
||||
Final Preparations
|
||||
==================
|
||||
|
||||
When MCJIT::finalizeObject is called, MCJIT calls
|
||||
RuntimeDyld::resolveRelocations. This function will attempt to locate any
|
||||
external symbols and then apply all relocations for the object.
|
||||
|
||||
External symbols are resolved by calling the memory manager's
|
||||
getPointerToNamedFunction method. The memory manager will return the
|
||||
address of the requested symbol in the target address space. (Note, this
|
||||
may not be a valid pointer in the host process.) RuntimeDyld will then
|
||||
iterate through the list of relocations it has stored which are associated
|
||||
with this symbol and invoke the resolveRelocation method which, through an
|
||||
format-specific implementation, will apply the relocation to the loaded
|
||||
section memory.
|
||||
|
||||
Next, RuntimeDyld::resolveRelocations iterates through the list of
|
||||
sections and for each section iterates through a list of relocations that
|
||||
have been saved which reference that symbol and call resolveRelocation for
|
||||
each entry in this list. The relocation list here is a list of
|
||||
relocations for which the symbol associated with the relocation is located
|
||||
in the section associated with the list. Each of these locations will
|
||||
have a target location at which the relocation will be applied that is
|
||||
likely located in a different section.
|
||||
|
||||
.. image:: MCJIT-resolve-relocations.png
|
||||
|
||||
Once relocations have been applied as described above, MCJIT calls
|
||||
RuntimeDyld::getEHFrameSection, and if a non-zero result is returned
|
||||
passes the section data to the memory manager's registerEHFrames method.
|
||||
This allows the memory manager to call any desired target-specific
|
||||
functions, such as registering the EH frame information with a debugger.
|
||||
|
||||
Finally, MCJIT calls the memory manager's finalizeMemory method. In this
|
||||
method, the memory manager will invalidate the target code cache, if
|
||||
necessary, and apply final permissions to the memory pages it has
|
||||
allocated for code and data memory.
|
||||
|
||||
|
|
|
|||
|
|
@ -59,11 +59,6 @@ bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
|
|||
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
|
||||
bool LookThroughBitCast = false);
|
||||
|
||||
/// \brief Tests if a value is a call or invoke to a library function that
|
||||
/// allocates memory and never returns null (such as operator new).
|
||||
bool isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
|
||||
bool LookThroughBitCast = false);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// malloc Call Utility Functions.
|
||||
//
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ private:
|
|||
// of memory operands required to be precise exceeds the maximum value of
|
||||
// NumMemRefs - currently 256 - we remove the operands entirely. Note also
|
||||
// that this is a non-owning reference to a shared copy on write buffer owned
|
||||
// by the MachineFunction and created via MF.allocateMemRefsArray.
|
||||
// by the MachineFunction and created via MF.allocateMemRefsArray.
|
||||
mmo_iterator MemRefs;
|
||||
|
||||
DebugLoc debugLoc; // Source line information.
|
||||
|
|
@ -354,7 +354,7 @@ public:
|
|||
mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
|
||||
/// Return true if we don't have any memory operands which described the the
|
||||
/// memory access done by this instruction. If this is true, calling code
|
||||
/// must be conservative.
|
||||
/// must be conservative.
|
||||
bool memoperands_empty() const { return NumMemRefs == 0; }
|
||||
|
||||
iterator_range<mmo_iterator> memoperands() {
|
||||
|
|
@ -774,7 +774,7 @@ public:
|
|||
bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
|
||||
bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
|
||||
bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
|
||||
bool isMSInlineAsm() const {
|
||||
bool isMSInlineAsm() const {
|
||||
return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect();
|
||||
}
|
||||
bool isStackAligningInlineAsm() const;
|
||||
|
|
@ -1180,11 +1180,26 @@ public:
|
|||
/// Assign this MachineInstr's memory reference descriptor list.
|
||||
/// This does not transfer ownership.
|
||||
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
|
||||
MemRefs = NewMemRefs;
|
||||
NumMemRefs = uint8_t(NewMemRefsEnd - NewMemRefs);
|
||||
assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs");
|
||||
setMemRefs(std::make_pair(NewMemRefs, NewMemRefsEnd-NewMemRefs));
|
||||
}
|
||||
|
||||
/// Assign this MachineInstr's memory reference descriptor list. First
|
||||
/// element in the pair is the begin iterator/pointer to the array; the
|
||||
/// second is the number of MemoryOperands. This does not transfer ownership
|
||||
/// of the underlying memory.
|
||||
void setMemRefs(std::pair<mmo_iterator, unsigned> NewMemRefs) {
|
||||
MemRefs = NewMemRefs.first;
|
||||
NumMemRefs = uint8_t(NewMemRefs.second);
|
||||
assert(NumMemRefs == NewMemRefs.second &&
|
||||
"Too many memrefs - must drop memory operands");
|
||||
}
|
||||
|
||||
/// Return a set of memrefs (begin iterator, size) which conservatively
|
||||
/// describe the memory behavior of both MachineInstrs. This is appropriate
|
||||
/// for use when merging two MachineInstrs into one. This routine does not
|
||||
/// modify the memrefs of the this MachineInstr.
|
||||
std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other);
|
||||
|
||||
/// Clear this MachineInstr's memory reference descriptor list. This resets
|
||||
/// the memrefs to their most conservative state. This should be used only
|
||||
/// as a last resort since it greatly pessimizes our knowledge of the memory
|
||||
|
|
|
|||
|
|
@ -162,6 +162,11 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
const MachineInstrBuilder &setMemRefs(std::pair<MachineInstr::mmo_iterator,
|
||||
unsigned> MemOperandsRef) const {
|
||||
MI->setMemRefs(MemOperandsRef);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const MachineInstrBuilder &addOperand(const MachineOperand &MO) const {
|
||||
MI->addOperand(*MF, MO);
|
||||
|
|
|
|||
|
|
@ -178,7 +178,7 @@ public:
|
|||
/// register.
|
||||
bool FullyDefined;
|
||||
|
||||
/// Reg or ont of its aliases is read. The register may only be read
|
||||
/// Reg or one of its aliases is read. The register may only be read
|
||||
/// partially.
|
||||
bool Read;
|
||||
/// Reg or a super-register is read. The full register is read.
|
||||
|
|
|
|||
|
|
@ -83,7 +83,9 @@ enum class ClrHandlerType { Catch, Finally, Fault, Filter };
|
|||
struct ClrEHUnwindMapEntry {
|
||||
MBBOrBasicBlock Handler;
|
||||
uint32_t TypeToken;
|
||||
int Parent;
|
||||
int HandlerParentState; ///< Outer handler enclosing this entry's handler
|
||||
int TryParentState; ///< Outer try region enclosing this entry's try region,
|
||||
///< treating later catches on same try as "outer"
|
||||
ClrHandlerType HandlerType;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -310,6 +310,11 @@ public:
|
|||
CALLSITE_DELEGATE_GETTER(hasFnAttr(A));
|
||||
}
|
||||
|
||||
/// \brief Return true if this function has the given attribute.
|
||||
bool hasFnAttr(StringRef A) const {
|
||||
CALLSITE_DELEGATE_GETTER(hasFnAttr(A));
|
||||
}
|
||||
|
||||
/// \brief Return true if the call or the callee has the given attribute.
|
||||
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const {
|
||||
CALLSITE_DELEGATE_GETTER(paramHasAttr(i, A));
|
||||
|
|
|
|||
|
|
@ -61,9 +61,13 @@ protected:
|
|||
MDNode *DefaultFPMathTag;
|
||||
FastMathFlags FMF;
|
||||
|
||||
ArrayRef<OperandBundleDef> DefaultOperandBundles;
|
||||
|
||||
public:
|
||||
IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr)
|
||||
: Context(context), DefaultFPMathTag(FPMathTag), FMF() {
|
||||
IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr,
|
||||
ArrayRef<OperandBundleDef> OpBundles = None)
|
||||
: Context(context), DefaultFPMathTag(FPMathTag), FMF(),
|
||||
DefaultOperandBundles(OpBundles) {
|
||||
ClearInsertionPoint();
|
||||
}
|
||||
|
||||
|
|
@ -538,37 +542,44 @@ class IRBuilder : public IRBuilderBase, public Inserter {
|
|||
|
||||
public:
|
||||
IRBuilder(LLVMContext &C, const T &F, Inserter I = Inserter(),
|
||||
MDNode *FPMathTag = nullptr)
|
||||
: IRBuilderBase(C, FPMathTag), Inserter(std::move(I)), Folder(F) {}
|
||||
MDNode *FPMathTag = nullptr,
|
||||
ArrayRef<OperandBundleDef> OpBundles = None)
|
||||
: IRBuilderBase(C, FPMathTag, OpBundles), Inserter(std::move(I)),
|
||||
Folder(F) {}
|
||||
|
||||
explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr)
|
||||
: IRBuilderBase(C, FPMathTag), Folder() {
|
||||
}
|
||||
explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr,
|
||||
ArrayRef<OperandBundleDef> OpBundles = None)
|
||||
: IRBuilderBase(C, FPMathTag, OpBundles), Folder() {}
|
||||
|
||||
explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr)
|
||||
: IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) {
|
||||
explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr,
|
||||
ArrayRef<OperandBundleDef> OpBundles = None)
|
||||
: IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) {
|
||||
SetInsertPoint(TheBB);
|
||||
}
|
||||
|
||||
explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr)
|
||||
: IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() {
|
||||
explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr,
|
||||
ArrayRef<OperandBundleDef> OpBundles = None)
|
||||
: IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
|
||||
SetInsertPoint(TheBB);
|
||||
}
|
||||
|
||||
explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr)
|
||||
: IRBuilderBase(IP->getContext(), FPMathTag), Folder() {
|
||||
explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr,
|
||||
ArrayRef<OperandBundleDef> OpBundles = None)
|
||||
: IRBuilderBase(IP->getContext(), FPMathTag, OpBundles), Folder() {
|
||||
SetInsertPoint(IP);
|
||||
}
|
||||
|
||||
IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F,
|
||||
MDNode *FPMathTag = nullptr)
|
||||
: IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) {
|
||||
IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T &F,
|
||||
MDNode *FPMathTag = nullptr,
|
||||
ArrayRef<OperandBundleDef> OpBundles = None)
|
||||
: IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) {
|
||||
SetInsertPoint(TheBB, IP);
|
||||
}
|
||||
|
||||
IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP,
|
||||
MDNode *FPMathTag = nullptr)
|
||||
: IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() {
|
||||
MDNode *FPMathTag = nullptr,
|
||||
ArrayRef<OperandBundleDef> OpBundles = None)
|
||||
: IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
|
||||
SetInsertPoint(TheBB, IP);
|
||||
}
|
||||
|
||||
|
|
@ -1529,8 +1540,11 @@ public:
|
|||
|
||||
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
|
||||
ArrayRef<OperandBundleDef> OpBundles = None,
|
||||
const Twine &Name = "") {
|
||||
return Insert(CallInst::Create(Callee, Args, OpBundles), Name);
|
||||
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
|
||||
CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
|
||||
if (isa<FPMathOperator>(CI))
|
||||
CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
|
||||
return Insert(CI, Name);
|
||||
}
|
||||
|
||||
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
|
||||
|
|
@ -1543,7 +1557,7 @@ public:
|
|||
CallInst *CreateCall(llvm::FunctionType *FTy, Value *Callee,
|
||||
ArrayRef<Value *> Args, const Twine &Name = "",
|
||||
MDNode *FPMathTag = nullptr) {
|
||||
CallInst *CI = CallInst::Create(FTy, Callee, Args);
|
||||
CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
|
||||
if (isa<FPMathOperator>(CI))
|
||||
CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
|
||||
return Insert(CI, Name);
|
||||
|
|
|
|||
|
|
@ -3550,6 +3550,11 @@ public:
|
|||
return hasFnAttrImpl(A);
|
||||
}
|
||||
|
||||
/// \brief Determine whether this call has the given attribute.
|
||||
bool hasFnAttr(StringRef A) const {
|
||||
return hasFnAttrImpl(A);
|
||||
}
|
||||
|
||||
/// \brief Determine whether the call or the callee has the given attributes.
|
||||
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
|
||||
|
||||
|
|
@ -3734,7 +3739,19 @@ private:
|
|||
unsigned getNumSuccessorsV() const override;
|
||||
void setSuccessorV(unsigned idx, BasicBlock *B) override;
|
||||
|
||||
bool hasFnAttrImpl(Attribute::AttrKind A) const;
|
||||
template <typename AttrKind> bool hasFnAttrImpl(AttrKind A) const {
|
||||
if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
|
||||
return true;
|
||||
|
||||
// Operand bundles override attributes on the called function, but don't
|
||||
// override attributes directly present on the invoke instruction.
|
||||
if (isFnAttrDisallowedByOpBundle(A))
|
||||
return false;
|
||||
|
||||
if (const Function *F = getCalledFunction())
|
||||
return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Shadow Instruction::setInstructionSubclassData with a private forwarding
|
||||
// method so that subclasses cannot accidentally use it.
|
||||
|
|
@ -3966,6 +3983,8 @@ public:
|
|||
/// point to the added handler.
|
||||
void addHandler(BasicBlock *Dest);
|
||||
|
||||
void removeHandler(handler_iterator HI);
|
||||
|
||||
unsigned getNumSuccessors() const { return getNumOperands() - 1; }
|
||||
BasicBlock *getSuccessor(unsigned Idx) const {
|
||||
assert(Idx < getNumSuccessors() &&
|
||||
|
|
|
|||
|
|
@ -32,6 +32,19 @@ let TargetPrefix = "x86" in {
|
|||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FLAGS.
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_flags_read_u32 : GCCBuiltin<"__builtin_ia32_readeflags_u32">,
|
||||
Intrinsic<[llvm_i32_ty], [], []>;
|
||||
def int_x86_flags_read_u64 : GCCBuiltin<"__builtin_ia32_readeflags_u64">,
|
||||
Intrinsic<[llvm_i64_ty], [], []>;
|
||||
def int_x86_flags_write_u32 : GCCBuiltin<"__builtin_ia32_writeeflags_u32">,
|
||||
Intrinsic<[], [llvm_i32_ty], []>;
|
||||
def int_x86_flags_write_u64 : GCCBuiltin<"__builtin_ia32_writeeflags_u64">,
|
||||
Intrinsic<[], [llvm_i64_ty], []>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Read Time Stamp Counter.
|
||||
let TargetPrefix = "x86" in {
|
||||
|
|
@ -2211,6 +2224,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
|
||||
llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psra_w_128 : GCCBuiltin<"__builtin_ia32_psraw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_w_256 : GCCBuiltin<"__builtin_ia32_psraw256_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
|
||||
llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_wi_128 : GCCBuiltin<"__builtin_ia32_psrawi128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
|
||||
llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_wi_256 : GCCBuiltin<"__builtin_ia32_psrawi256_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_wi_512 : GCCBuiltin<"__builtin_ia32_psrawi512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
|
||||
llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
|
|
@ -2229,6 +2261,69 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
def int_x86_avx512_mask_psra_q : GCCBuiltin<"__builtin_ia32_psraq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psra_d_128 : GCCBuiltin<"__builtin_ia32_psrad128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_d_256 : GCCBuiltin<"__builtin_ia32_psrad256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_di_128 : GCCBuiltin<"__builtin_ia32_psradi128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
|
||||
llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_di_256 : GCCBuiltin<"__builtin_ia32_psradi256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_di_512 : GCCBuiltin<"__builtin_ia32_psradi512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_qi_128 : GCCBuiltin<"__builtin_ia32_psraqi128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
|
||||
llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_qi_256 : GCCBuiltin<"__builtin_ia32_psraqi256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psra_qi_512 : GCCBuiltin<"__builtin_ia32_psraqi512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psrl_d_128: GCCBuiltin<"__builtin_ia32_psrld128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_d_256: GCCBuiltin<"__builtin_ia32_psrld256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty,
|
||||
llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_di_128: GCCBuiltin<"__builtin_ia32_psrldi128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty,
|
||||
llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_di_256: GCCBuiltin<"__builtin_ia32_psrldi256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty,
|
||||
llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_di_512: GCCBuiltin<"__builtin_ia32_psrldi512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [ llvm_v16i32_ty,
|
||||
llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty ], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psrl_q_128: GCCBuiltin<"__builtin_ia32_psrlq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_q_256: GCCBuiltin<"__builtin_ia32_psrlq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_qi_128: GCCBuiltin<"__builtin_ia32_psrlqi128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
|
||||
llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_qi_256: GCCBuiltin<"__builtin_ia32_psrlqi256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrl_qi_512: GCCBuiltin<"__builtin_ia32_psrlqi512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Pack ops.
|
||||
|
|
@ -2696,6 +2791,59 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psll_d_128 : GCCBuiltin<"__builtin_ia32_pslld128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_d_256 : GCCBuiltin<"__builtin_ia32_pslld256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_di_128 : GCCBuiltin<"__builtin_ia32_pslldi128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
|
||||
llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_di_256 : GCCBuiltin<"__builtin_ia32_pslldi256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_di_512 : GCCBuiltin<"__builtin_ia32_pslldi512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_q_128 : GCCBuiltin<"__builtin_ia32_psllq128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_q_256 : GCCBuiltin<"__builtin_ia32_psllq256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_qi_128 : GCCBuiltin<"__builtin_ia32_psllqi128_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
|
||||
llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_qi_256 : GCCBuiltin<"__builtin_ia32_psllqi256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psll_qi_512 : GCCBuiltin<"__builtin_ia32_psllqi512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrlv2_di : GCCBuiltin<"__builtin_ia32_psrlv2di_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrlv32hi : GCCBuiltin<"__builtin_ia32_psrlv32hi_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrlv4_di : GCCBuiltin<"__builtin_ia32_psrlv4di_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrlv4_si : GCCBuiltin<"__builtin_ia32_psrlv4si_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrlv8_hi : GCCBuiltin<"__builtin_ia32_psrlv8hi_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Gather ops
|
||||
|
|
@ -3919,9 +4067,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
// Support protection key
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">,
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_i32_ty], [], []>;
|
||||
def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">,
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
|
||||
Intrinsic<[], [llvm_i32_ty], []>;
|
||||
}
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Half float conversion
|
||||
|
|
|
|||
|
|
@ -283,14 +283,20 @@ private:
|
|||
LLVMContext &Context;
|
||||
uint64_t NextIndex;
|
||||
SmallDenseMap<void *, std::pair<OwnerTy, uint64_t>, 4> UseMap;
|
||||
/// Flag that can be set to false if this metadata should not be
|
||||
/// RAUW'ed, e.g. if it is used as the key of a map.
|
||||
bool CanReplace;
|
||||
|
||||
public:
|
||||
ReplaceableMetadataImpl(LLVMContext &Context)
|
||||
: Context(Context), NextIndex(0) {}
|
||||
: Context(Context), NextIndex(0), CanReplace(true) {}
|
||||
~ReplaceableMetadataImpl() {
|
||||
assert(UseMap.empty() && "Cannot destroy in-use replaceable metadata");
|
||||
}
|
||||
|
||||
/// Set the CanReplace flag to the given value.
|
||||
void setCanReplace(bool Replaceable) { CanReplace = Replaceable; }
|
||||
|
||||
LLVMContext &getContext() const { return Context; }
|
||||
|
||||
/// \brief Replace all uses of this with MD.
|
||||
|
|
@ -901,14 +907,19 @@ public:
|
|||
Context.getReplaceableUses()->replaceAllUsesWith(MD);
|
||||
}
|
||||
|
||||
/// Set the CanReplace flag to the given value.
|
||||
void setCanReplace(bool Replaceable) {
|
||||
Context.getReplaceableUses()->setCanReplace(Replaceable);
|
||||
}
|
||||
|
||||
/// \brief Resolve cycles.
|
||||
///
|
||||
/// Once all forward declarations have been resolved, force cycles to be
|
||||
/// resolved. If \p MDMaterialized is true, then any temporary metadata
|
||||
/// resolved. If \p AllowTemps is true, then any temporary metadata
|
||||
/// is ignored, otherwise it asserts when encountering temporary metadata.
|
||||
///
|
||||
/// \pre No operands (or operands' operands, etc.) have \a isTemporary().
|
||||
void resolveCycles(bool MDMaterialized = true);
|
||||
void resolveCycles(bool AllowTemps = false);
|
||||
|
||||
/// \brief Replace a temporary node with a permanent one.
|
||||
///
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
|
||||
|
|
@ -36,14 +37,13 @@ enum class StatepointFlags {
|
|||
MaskAll = GCTransition ///< A bitmask that includes all valid flags.
|
||||
};
|
||||
|
||||
class GCRelocateOperands;
|
||||
class GCRelocateInst;
|
||||
class ImmutableStatepoint;
|
||||
|
||||
bool isStatepoint(const ImmutableCallSite &CS);
|
||||
bool isStatepoint(const Value *V);
|
||||
bool isStatepoint(const Value &V);
|
||||
|
||||
bool isGCRelocate(const Value *V);
|
||||
bool isGCRelocate(const ImmutableCallSite &CS);
|
||||
|
||||
bool isGCResult(const Value *V);
|
||||
|
|
@ -247,7 +247,7 @@ public:
|
|||
/// May contain several relocations for the same base/derived pair.
|
||||
/// For example this could happen due to relocations on unwinding
|
||||
/// path of invoke.
|
||||
std::vector<GCRelocateOperands> getRelocates() const;
|
||||
std::vector<const GCRelocateInst *> getRelocates() const;
|
||||
|
||||
/// Get the experimental_gc_result call tied to this statepoint. Can be
|
||||
/// nullptr if there isn't a gc_result tied to this statepoint. Guaranteed to
|
||||
|
|
@ -305,33 +305,27 @@ public:
|
|||
explicit Statepoint(CallSite CS) : Base(CS) {}
|
||||
};
|
||||
|
||||
/// Wraps a call to a gc.relocate and provides access to it's operands.
|
||||
/// TODO: This should likely be refactored to resememble the wrappers in
|
||||
/// InstrinsicInst.h.
|
||||
class GCRelocateOperands {
|
||||
ImmutableCallSite RelocateCS;
|
||||
|
||||
/// This represents the gc.relocate intrinsic.
|
||||
class GCRelocateInst : public IntrinsicInst {
|
||||
public:
|
||||
GCRelocateOperands(const User *U) : RelocateCS(U) { assert(isGCRelocate(U)); }
|
||||
GCRelocateOperands(const Instruction *inst) : RelocateCS(inst) {
|
||||
assert(isGCRelocate(inst));
|
||||
static inline bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
|
||||
}
|
||||
static inline bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
GCRelocateOperands(CallSite CS) : RelocateCS(CS) { assert(isGCRelocate(CS)); }
|
||||
|
||||
/// Return true if this relocate is tied to the invoke statepoint.
|
||||
/// This includes relocates which are on the unwinding path.
|
||||
bool isTiedToInvoke() const {
|
||||
const Value *Token = RelocateCS.getArgument(0);
|
||||
const Value *Token = getArgOperand(0);
|
||||
|
||||
return isa<LandingPadInst>(Token) || isa<InvokeInst>(Token);
|
||||
}
|
||||
|
||||
/// Get enclosed relocate intrinsic
|
||||
ImmutableCallSite getUnderlyingCallSite() { return RelocateCS; }
|
||||
|
||||
/// The statepoint with which this gc.relocate is associated.
|
||||
const Instruction *getStatepoint() {
|
||||
const Value *Token = RelocateCS.getArgument(0);
|
||||
const Instruction *getStatepoint() const {
|
||||
const Value *Token = getArgOperand(0);
|
||||
|
||||
// This takes care both of relocates for call statepoints and relocates
|
||||
// on normal path of invoke statepoint.
|
||||
|
|
@ -354,22 +348,22 @@ public:
|
|||
/// The index into the associate statepoint's argument list
|
||||
/// which contains the base pointer of the pointer whose
|
||||
/// relocation this gc.relocate describes.
|
||||
unsigned getBasePtrIndex() {
|
||||
return cast<ConstantInt>(RelocateCS.getArgument(1))->getZExtValue();
|
||||
unsigned getBasePtrIndex() const {
|
||||
return cast<ConstantInt>(getArgOperand(1))->getZExtValue();
|
||||
}
|
||||
|
||||
/// The index into the associate statepoint's argument list which
|
||||
/// contains the pointer whose relocation this gc.relocate describes.
|
||||
unsigned getDerivedPtrIndex() {
|
||||
return cast<ConstantInt>(RelocateCS.getArgument(2))->getZExtValue();
|
||||
unsigned getDerivedPtrIndex() const {
|
||||
return cast<ConstantInt>(getArgOperand(2))->getZExtValue();
|
||||
}
|
||||
|
||||
Value *getBasePtr() {
|
||||
Value *getBasePtr() const {
|
||||
ImmutableCallSite CS(getStatepoint());
|
||||
return *(CS.arg_begin() + getBasePtrIndex());
|
||||
}
|
||||
|
||||
Value *getDerivedPtr() {
|
||||
Value *getDerivedPtr() const {
|
||||
ImmutableCallSite CS(getStatepoint());
|
||||
return *(CS.arg_begin() + getDerivedPtrIndex());
|
||||
}
|
||||
|
|
@ -377,11 +371,11 @@ public:
|
|||
|
||||
template <typename FunTy, typename InstructionTy, typename ValueTy,
|
||||
typename CallSiteTy>
|
||||
std::vector<GCRelocateOperands>
|
||||
std::vector<const GCRelocateInst *>
|
||||
StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
|
||||
const {
|
||||
|
||||
std::vector<GCRelocateOperands> Result;
|
||||
std::vector<const GCRelocateInst *> Result;
|
||||
|
||||
CallSiteTy StatepointCS = getCallSite();
|
||||
|
||||
|
|
@ -389,8 +383,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
|
|||
// gc_relocates ensures that we only get pairs which are actually relocated
|
||||
// and used after the statepoint.
|
||||
for (const User *U : getInstruction()->users())
|
||||
if (isGCRelocate(U))
|
||||
Result.push_back(GCRelocateOperands(U));
|
||||
if (auto *Relocate = dyn_cast<GCRelocateInst>(U))
|
||||
Result.push_back(Relocate);
|
||||
|
||||
if (!StatepointCS.isInvoke())
|
||||
return Result;
|
||||
|
|
@ -401,8 +395,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
|
|||
|
||||
// Search for gc relocates that are attached to this landingpad.
|
||||
for (const User *LandingPadUser : LandingPad->users()) {
|
||||
if (isGCRelocate(LandingPadUser))
|
||||
Result.push_back(GCRelocateOperands(LandingPadUser));
|
||||
if (auto *Relocate = dyn_cast<GCRelocateInst>(LandingPadUser))
|
||||
Result.push_back(Relocate);
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,8 +39,8 @@ public:
|
|||
FeatureBitset(const bitset<MAX_SUBTARGET_FEATURES>& B) : bitset(B) {}
|
||||
|
||||
FeatureBitset(std::initializer_list<unsigned> Init) : bitset() {
|
||||
for (auto I = Init.begin() , E = Init.end(); I != E; ++I)
|
||||
set(*I);
|
||||
for (auto I : Init)
|
||||
set(I);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -59,6 +59,11 @@ struct SubtargetFeatureKV {
|
|||
bool operator<(StringRef S) const {
|
||||
return StringRef(Key) < S;
|
||||
}
|
||||
|
||||
// Compare routine for std::is_sorted.
|
||||
bool operator<(const SubtargetFeatureKV &Other) const {
|
||||
return StringRef(Key) < StringRef(Other.Key);
|
||||
}
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
@ -98,14 +103,13 @@ public:
|
|||
/// Adding Features.
|
||||
void AddFeature(StringRef String, bool Enable = true);
|
||||
|
||||
/// ToggleFeature - Toggle a feature and returns the newly updated feature
|
||||
/// bits.
|
||||
FeatureBitset ToggleFeature(FeatureBitset Bits, StringRef String,
|
||||
ArrayRef<SubtargetFeatureKV> FeatureTable);
|
||||
/// ToggleFeature - Toggle a feature and update the feature bits.
|
||||
static void ToggleFeature(FeatureBitset &Bits, StringRef String,
|
||||
ArrayRef<SubtargetFeatureKV> FeatureTable);
|
||||
|
||||
/// Apply the feature flag and return the newly updated feature bits.
|
||||
FeatureBitset ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
|
||||
ArrayRef<SubtargetFeatureKV> FeatureTable);
|
||||
/// Apply the feature flag and update the feature bits.
|
||||
static void ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
|
||||
ArrayRef<SubtargetFeatureKV> FeatureTable);
|
||||
|
||||
/// Get feature bits of a CPU.
|
||||
FeatureBitset getFeatureBits(StringRef CPU,
|
||||
|
|
|
|||
|
|
@ -155,11 +155,36 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName);
|
|||
GlobalVariable *createPGOFuncNameVar(Module &M,
|
||||
GlobalValue::LinkageTypes Linkage,
|
||||
StringRef FuncName);
|
||||
/// Return the initializer in string of the PGO name var \c NameVar.
|
||||
StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);
|
||||
|
||||
/// Given a PGO function name, remove the filename prefix and return
|
||||
/// the original (static) function name.
|
||||
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
|
||||
|
||||
/// Given a vector of strings (function PGO names) \c NameStrs, the
|
||||
/// method generates a combined string \c Result thatis ready to be
|
||||
/// serialized. The \c Result string is comprised of three fields:
|
||||
/// The first field is the legnth of the uncompressed strings, and the
|
||||
/// the second field is the length of the zlib-compressed string.
|
||||
/// Both fields are encoded in ULEB128. If \c doCompress is false, the
|
||||
/// third field is the uncompressed strings; otherwise it is the
|
||||
/// compressed string. When the string compression is off, the
|
||||
/// second field will have value zero.
|
||||
int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
|
||||
bool doCompression, std::string &Result);
|
||||
/// Produce \c Result string with the same format described above. The input
|
||||
/// is vector of PGO function name variables that are referenced.
|
||||
int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
|
||||
std::string &Result);
|
||||
class InstrProfSymtab;
|
||||
/// \c NameStrings is a string composed of one of more sub-strings encoded in
|
||||
/// the
|
||||
/// format described above. The substrings are seperated by 0 or more zero
|
||||
/// bytes.
|
||||
/// This method decodes the string and populates the \c Symtab.
|
||||
int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
|
||||
|
||||
const std::error_category &instrprof_category();
|
||||
|
||||
enum class instrprof_error {
|
||||
|
|
@ -235,6 +260,11 @@ public:
|
|||
/// This interface is used by reader of CoverageMapping test
|
||||
/// format.
|
||||
inline std::error_code create(StringRef D, uint64_t BaseAddr);
|
||||
/// \c NameStrings is a string composed of one of more sub-strings
|
||||
/// encoded in the format described above. The substrings are
|
||||
/// seperated by 0 or more zero bytes. This method decodes the
|
||||
/// string and populates the \c Symtab.
|
||||
inline std::error_code create(StringRef NameStrings);
|
||||
/// Create InstrProfSymtab from a set of names iteratable from
|
||||
/// \p IterRange. This interface is used by IndexedProfReader.
|
||||
template <typename NameIterRange> void create(const NameIterRange &IterRange);
|
||||
|
|
@ -255,8 +285,8 @@ public:
|
|||
AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
|
||||
}
|
||||
AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
|
||||
/// Return function's PGO name from the function name's symabol
|
||||
/// address in the object file. If an error occurs, Return
|
||||
/// Return function's PGO name from the function name's symbol
|
||||
/// address in the object file. If an error occurs, return
|
||||
/// an empty string.
|
||||
StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
|
||||
/// Return function's PGO name from the name's md5 hash value.
|
||||
|
|
@ -270,6 +300,12 @@ std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
|
|||
return std::error_code();
|
||||
}
|
||||
|
||||
std::error_code InstrProfSymtab::create(StringRef NameStrings) {
|
||||
if (readPGOFuncNameStrings(NameStrings, *this))
|
||||
return make_error_code(instrprof_error::malformed);
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
template <typename NameIterRange>
|
||||
void InstrProfSymtab::create(const NameIterRange &IterRange) {
|
||||
for (auto Name : IterRange)
|
||||
|
|
@ -576,8 +612,14 @@ template <class IntPtrT> struct CovMapFunctionRecord {
|
|||
#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
|
||||
#include "llvm/ProfileData/InstrProfData.inc"
|
||||
};
|
||||
LLVM_PACKED_END
|
||||
// Per module coverage mapping data header, i.e. CoverageMapFileHeader
|
||||
// documented above.
|
||||
struct CovMapHeader {
|
||||
#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name;
|
||||
#include "llvm/ProfileData/InstrProfData.inc"
|
||||
};
|
||||
|
||||
LLVM_PACKED_END
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
/*===-- InstrProfData.inc - instr profiling runtime structures -----------=== *\
|
||||
/*===-- InstrProfData.inc - instr profiling runtime structures -*- C++ -*-=== *\
|
||||
|*
|
||||
|* The LLVM Compiler Infrastructure
|
||||
|*
|
||||
|
|
@ -167,6 +167,25 @@ COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
|
|||
#undef COVMAP_FUNC_RECORD
|
||||
/* COVMAP_FUNC_RECORD end. */
|
||||
|
||||
/* COVMAP_HEADER start */
|
||||
/* Definition of member fields of coverage map header.
|
||||
*/
|
||||
#ifndef COVMAP_HEADER
|
||||
#define COVMAP_HEADER(Type, LLVMType, Name, Initializer)
|
||||
#else
|
||||
#define INSTR_PROF_DATA_DEFINED
|
||||
#endif
|
||||
COVMAP_HEADER(uint32_t, Int32Ty, NRecords, \
|
||||
llvm::ConstantInt::get(Int32Ty, FunctionRecords.size()))
|
||||
COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \
|
||||
llvm::ConstantInt::get(Int32Ty, FilenamesSize))
|
||||
COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \
|
||||
llvm::ConstantInt::get(Int32Ty, CoverageMappingSize))
|
||||
COVMAP_HEADER(uint32_t, Int32Ty, Version, \
|
||||
llvm::ConstantInt::get(Int32Ty, CoverageMappingVersion1))
|
||||
#undef COVMAP_HEADER
|
||||
/* COVMAP_HEADER end. */
|
||||
|
||||
|
||||
#ifdef INSTR_PROF_VALUE_PROF_DATA
|
||||
#define INSTR_PROF_DATA_DEFINED
|
||||
|
|
|
|||
|
|
@ -213,6 +213,7 @@ ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, AEK_CRC)
|
|||
ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
|
||||
ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
|
||||
ARM_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
|
||||
ARM_CPU_NAME("exynos-m1", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
|
||||
// Non-standard Arch names.
|
||||
ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, AEK_NONE)
|
||||
ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, AEK_NONE)
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ struct ProcessInfo {
|
|||
|
||||
/// Return true if the given arguments fit within system-specific
|
||||
/// argument length limits.
|
||||
bool argumentsFitWithinSystemLimits(ArrayRef<const char*> Args);
|
||||
bool commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args);
|
||||
|
||||
/// File encoding options when writing contents that a non-UTF8 tool will
|
||||
/// read (on Windows systems). For UNIX, we always use UTF-8.
|
||||
|
|
|
|||
|
|
@ -305,7 +305,7 @@ private:
|
|||
/// increment() which must set CurrentEntry to 0 to create an end iterator.
|
||||
template <class BaseT, class ValueT>
|
||||
class basic_collection_iterator
|
||||
: public std::iterator<std::forward_iterator_tag, ValueT> {
|
||||
: public std::iterator<std::input_iterator_tag, ValueT> {
|
||||
public:
|
||||
basic_collection_iterator() : Base(nullptr) {}
|
||||
basic_collection_iterator(BaseT *B) : Base(B) {}
|
||||
|
|
@ -326,11 +326,24 @@ public:
|
|||
return Base->CurrentEntry;
|
||||
}
|
||||
|
||||
/// Note on EqualityComparable:
|
||||
///
|
||||
/// The iterator is not re-entrant,
|
||||
/// it is meant to be used for parsing YAML on-demand
|
||||
/// Once iteration started - it can point only to one entry at a time
|
||||
/// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
|
||||
/// iff Base and Other.Base are equal.
|
||||
bool operator==(const basic_collection_iterator &Other) const {
|
||||
if (Base && (Base == Other.Base)) {
|
||||
assert((Base->CurrentEntry == Other.Base->CurrentEntry)
|
||||
&& "Equal Bases expected to point to equal Entries");
|
||||
}
|
||||
|
||||
return Base == Other.Base;
|
||||
}
|
||||
|
||||
bool operator!=(const basic_collection_iterator &Other) const {
|
||||
if (Base != Other.Base)
|
||||
return true;
|
||||
return (Base && Other.Base) &&
|
||||
Base->CurrentEntry != Other.Base->CurrentEntry;
|
||||
return !(Base == Other.Base);
|
||||
}
|
||||
|
||||
basic_collection_iterator &operator++() {
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ protected:
|
|||
/// We could pack these a bit tighter by not having the IK_FirstXXXInit
|
||||
/// and IK_LastXXXInit be their own values, but that would degrade
|
||||
/// readability for really no benefit.
|
||||
enum InitKind {
|
||||
enum InitKind : uint8_t {
|
||||
IK_BitInit,
|
||||
IK_FirstTypedInit,
|
||||
IK_BitsInit,
|
||||
|
|
@ -256,6 +256,9 @@ protected:
|
|||
|
||||
private:
|
||||
const InitKind Kind;
|
||||
protected:
|
||||
uint8_t Opc; // Used by UnOpInit, BinOpInit, and TernOpInit
|
||||
private:
|
||||
Init(const Init &) = delete;
|
||||
Init &operator=(const Init &) = delete;
|
||||
virtual void anchor();
|
||||
|
|
@ -264,7 +267,7 @@ public:
|
|||
InitKind getKind() const { return Kind; }
|
||||
|
||||
protected:
|
||||
explicit Init(InitKind K) : Kind(K) {}
|
||||
explicit Init(InitKind K, uint8_t Opc = 0) : Kind(K), Opc(Opc) {}
|
||||
|
||||
public:
|
||||
virtual ~Init() {}
|
||||
|
|
@ -365,7 +368,8 @@ class TypedInit : public Init {
|
|||
TypedInit &operator=(const TypedInit &Other) = delete;
|
||||
|
||||
protected:
|
||||
explicit TypedInit(InitKind K, RecTy *T) : Init(K), Ty(T) {}
|
||||
explicit TypedInit(InitKind K, RecTy *T, uint8_t Opc = 0)
|
||||
: Init(K, Opc), Ty(T) {}
|
||||
~TypedInit() override {
|
||||
// If this is a DefInit we need to delete the RecordRecTy.
|
||||
if (getKind() == IK_DefInit)
|
||||
|
|
@ -650,7 +654,8 @@ class OpInit : public TypedInit {
|
|||
OpInit &operator=(OpInit &Other) = delete;
|
||||
|
||||
protected:
|
||||
explicit OpInit(InitKind K, RecTy *Type) : TypedInit(K, Type) {}
|
||||
explicit OpInit(InitKind K, RecTy *Type, uint8_t Opc)
|
||||
: TypedInit(K, Type, Opc) {}
|
||||
|
||||
public:
|
||||
static bool classof(const Init *I) {
|
||||
|
|
@ -677,14 +682,13 @@ public:
|
|||
///
|
||||
class UnOpInit : public OpInit {
|
||||
public:
|
||||
enum UnaryOp { CAST, HEAD, TAIL, EMPTY };
|
||||
enum UnaryOp : uint8_t { CAST, HEAD, TAIL, EMPTY };
|
||||
|
||||
private:
|
||||
UnaryOp Opc;
|
||||
Init *LHS;
|
||||
|
||||
UnOpInit(UnaryOp opc, Init *lhs, RecTy *Type)
|
||||
: OpInit(IK_UnOpInit, Type), Opc(opc), LHS(lhs) {}
|
||||
: OpInit(IK_UnOpInit, Type, opc), LHS(lhs) {}
|
||||
|
||||
UnOpInit(const UnOpInit &Other) = delete;
|
||||
UnOpInit &operator=(const UnOpInit &Other) = delete;
|
||||
|
|
@ -708,7 +712,7 @@ public:
|
|||
return getOperand();
|
||||
}
|
||||
|
||||
UnaryOp getOpcode() const { return Opc; }
|
||||
UnaryOp getOpcode() const { return (UnaryOp)Opc; }
|
||||
Init *getOperand() const { return LHS; }
|
||||
|
||||
// Fold - If possible, fold this to a simpler init. Return this if not
|
||||
|
|
@ -724,14 +728,14 @@ public:
|
|||
///
|
||||
class BinOpInit : public OpInit {
|
||||
public:
|
||||
enum BinaryOp { ADD, AND, SHL, SRA, SRL, LISTCONCAT, STRCONCAT, CONCAT, EQ };
|
||||
enum BinaryOp : uint8_t { ADD, AND, SHL, SRA, SRL, LISTCONCAT,
|
||||
STRCONCAT, CONCAT, EQ };
|
||||
|
||||
private:
|
||||
BinaryOp Opc;
|
||||
Init *LHS, *RHS;
|
||||
|
||||
BinOpInit(BinaryOp opc, Init *lhs, Init *rhs, RecTy *Type) :
|
||||
OpInit(IK_BinOpInit, Type), Opc(opc), LHS(lhs), RHS(rhs) {}
|
||||
OpInit(IK_BinOpInit, Type, opc), LHS(lhs), RHS(rhs) {}
|
||||
|
||||
BinOpInit(const BinOpInit &Other) = delete;
|
||||
BinOpInit &operator=(const BinOpInit &Other) = delete;
|
||||
|
|
@ -759,7 +763,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
BinaryOp getOpcode() const { return Opc; }
|
||||
BinaryOp getOpcode() const { return (BinaryOp)Opc; }
|
||||
Init *getLHS() const { return LHS; }
|
||||
Init *getRHS() const { return RHS; }
|
||||
|
||||
|
|
@ -776,15 +780,14 @@ public:
|
|||
///
|
||||
class TernOpInit : public OpInit {
|
||||
public:
|
||||
enum TernaryOp { SUBST, FOREACH, IF };
|
||||
enum TernaryOp : uint8_t { SUBST, FOREACH, IF };
|
||||
|
||||
private:
|
||||
TernaryOp Opc;
|
||||
Init *LHS, *MHS, *RHS;
|
||||
|
||||
TernOpInit(TernaryOp opc, Init *lhs, Init *mhs, Init *rhs,
|
||||
RecTy *Type) :
|
||||
OpInit(IK_TernOpInit, Type), Opc(opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
|
||||
OpInit(IK_TernOpInit, Type, opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
|
||||
|
||||
TernOpInit(const TernOpInit &Other) = delete;
|
||||
TernOpInit &operator=(const TernOpInit &Other) = delete;
|
||||
|
|
@ -815,7 +818,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
TernaryOp getOpcode() const { return Opc; }
|
||||
TernaryOp getOpcode() const { return (TernaryOp)Opc; }
|
||||
Init *getLHS() const { return LHS; }
|
||||
Init *getMHS() const { return MHS; }
|
||||
Init *getRHS() const { return RHS; }
|
||||
|
|
|
|||
|
|
@ -936,6 +936,10 @@ class AsmParser {
|
|||
// ShouldEmitMatchRegisterName - Set to false if the target needs a hand
|
||||
// written register name matcher
|
||||
bit ShouldEmitMatchRegisterName = 1;
|
||||
|
||||
// HasMnemonicFirst - Set to false if target instructions don't always
|
||||
// start with a mnemonic as the first token.
|
||||
bit HasMnemonicFirst = 1;
|
||||
}
|
||||
def DefaultAsmParser : AsmParser;
|
||||
|
||||
|
|
|
|||
|
|
@ -2269,6 +2269,12 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Return true if the MachineFunction contains a COPY which would imply
|
||||
/// HasOpaqueSPAdjustment.
|
||||
virtual bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Perform necessary initialization to handle a subset of CSRs explicitly
|
||||
/// via copies. This function is called at the beginning of instruction
|
||||
/// selection.
|
||||
|
|
|
|||
|
|
@ -23,11 +23,13 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
/// This optimization identifies DIV instructions that can be
|
||||
/// This optimization identifies DIV instructions in a BB that can be
|
||||
/// profitably bypassed and carried out with a shorter, faster divide.
|
||||
bool bypassSlowDivision(Function &F,
|
||||
Function::iterator &I,
|
||||
const DenseMap<unsigned int, unsigned int> &BypassWidth);
|
||||
///
|
||||
/// This optimization may add basic blocks immediately after BB; for obvious
|
||||
/// reasons, you shouldn't pass those blocks to bypassSlowDivision.
|
||||
bool bypassSlowDivision(
|
||||
BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidth);
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/EHPersonalities.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
|
||||
|
|
@ -39,6 +40,8 @@ struct LICMSafetyInfo {
|
|||
bool MayThrow; // The current loop contains an instruction which
|
||||
// may throw.
|
||||
bool HeaderMayThrow; // Same as previous, but specific to loop header
|
||||
// Used to update funclet bundle operands.
|
||||
DenseMap<BasicBlock *, ColorVector> BlockColors;
|
||||
LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false)
|
||||
{}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -543,7 +543,6 @@ static bool isMemsetPattern16(const Function *MS,
|
|||
isa<IntegerType>(MemsetType->getParamType(2)))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -583,9 +582,6 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
|
|||
if (F->onlyAccessesArgMemory())
|
||||
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
|
||||
|
||||
if (isMemsetPattern16(F, TLI))
|
||||
Min = FMRB_OnlyAccessesArgumentPointees;
|
||||
|
||||
// Otherwise be conservative.
|
||||
return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
|
||||
}
|
||||
|
|
@ -599,22 +595,21 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
|
|||
case Intrinsic::memset:
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memmove:
|
||||
assert((ArgIdx == 0 || ArgIdx == 1) &&
|
||||
"Invalid argument index for memory intrinsic");
|
||||
return ArgIdx ? MRI_Ref : MRI_Mod;
|
||||
// We don't currently have a writeonly attribute. All other properties
|
||||
// of these intrinsics are nicely described via attributes in
|
||||
// Intrinsics.td and handled generically below.
|
||||
if (ArgIdx == 0)
|
||||
return MRI_Mod;
|
||||
}
|
||||
|
||||
// We can bound the aliasing properties of memset_pattern16 just as we can
|
||||
// for memcpy/memset. This is particularly important because the
|
||||
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
|
||||
// whenever possible.
|
||||
if (CS.getCalledFunction() &&
|
||||
isMemsetPattern16(CS.getCalledFunction(), TLI)) {
|
||||
assert((ArgIdx == 0 || ArgIdx == 1) &&
|
||||
"Invalid argument index for memset_pattern16");
|
||||
return ArgIdx ? MRI_Ref : MRI_Mod;
|
||||
}
|
||||
// FIXME: Handle memset_pattern4 and memset_pattern8 also.
|
||||
// whenever possible. Note that all but the missing writeonly attribute are
|
||||
// handled via InferFunctionAttr.
|
||||
if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI))
|
||||
if (ArgIdx == 0)
|
||||
return MRI_Mod;
|
||||
|
||||
if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
|
||||
return MRI_Ref;
|
||||
|
|
|
|||
|
|
@ -376,15 +376,6 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
|
|||
} else {
|
||||
return true; // Argument of an unknown call.
|
||||
}
|
||||
// If the Callee is not ReadNone, it may read the global,
|
||||
// and if it is not ReadOnly, it may also write to it.
|
||||
Function *CalleeF = CS.getCalledFunction();
|
||||
if (!CalleeF->doesNotAccessMemory()) {
|
||||
if (Readers)
|
||||
Readers->insert(CalleeF);
|
||||
if (Writers && !CalleeF->onlyReadsMemory())
|
||||
Writers->insert(CalleeF);
|
||||
}
|
||||
}
|
||||
} else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
|
||||
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
|
||||
|
|
@ -516,7 +507,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
|
|||
|
||||
if (F->isDeclaration()) {
|
||||
// Try to get mod/ref behaviour from function attributes.
|
||||
if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) {
|
||||
if (F->doesNotAccessMemory()) {
|
||||
// Can't do better than that!
|
||||
} else if (F->onlyReadsMemory()) {
|
||||
FI.addModRefInfo(MRI_Ref);
|
||||
|
|
@ -524,12 +515,6 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
|
|||
// This function might call back into the module and read a global -
|
||||
// consider every global as possibly being read by this function.
|
||||
FI.setMayReadAnyGlobal();
|
||||
} else if (F->onlyAccessesArgMemory() ||
|
||||
F->onlyAccessesInaccessibleMemOrArgMem()) {
|
||||
// This function may only access (read/write) memory pointed to by its
|
||||
// arguments. If this pointer is to a global, this escaping use of the
|
||||
// pointer is captured in AnalyzeUsesOfPointer().
|
||||
FI.addModRefInfo(MRI_ModRef);
|
||||
} else {
|
||||
FI.addModRefInfo(MRI_ModRef);
|
||||
// Can't say anything useful unless it's an intrinsic - they don't
|
||||
|
|
|
|||
|
|
@ -187,13 +187,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
|
|||
return getAllocationData(V, AllocLike, TLI, LookThroughBitCast);
|
||||
}
|
||||
|
||||
/// \brief Tests if a value is a call or invoke to a library function that
|
||||
/// allocates memory and never returns null (such as operator new).
|
||||
bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
|
||||
bool LookThroughBitCast) {
|
||||
return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast);
|
||||
}
|
||||
|
||||
/// extractMallocCall - Returns the corresponding CallInst if the instruction
|
||||
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
|
||||
/// ignore InvokeInst here.
|
||||
|
|
|
|||
|
|
@ -477,7 +477,7 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
|
|||
// being 42. A key property of this program however is that if either
|
||||
// 1 or 4 were missing, there would be a race between the store of 42
|
||||
// either the store of 0 or the load (making the whole progam racy).
|
||||
// The paper mentionned above shows that the same property is respected
|
||||
// The paper mentioned above shows that the same property is respected
|
||||
// by every program that can detect any optimisation of that kind: either
|
||||
// it is racy (undefined) or there is a release followed by an acquire
|
||||
// between the pair of accesses under consideration.
|
||||
|
|
@ -685,13 +685,13 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
|
|||
return MemDepResult::getDef(Inst);
|
||||
if (isInvariantLoad)
|
||||
continue;
|
||||
// Be conservative if the accessed pointer may alias the allocation.
|
||||
if (AA->alias(Inst, AccessPtr) != NoAlias)
|
||||
return MemDepResult::getClobber(Inst);
|
||||
// If the allocation is not aliased and does not read memory (like
|
||||
// strdup), it is safe to ignore.
|
||||
if (isa<AllocaInst>(Inst) ||
|
||||
isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI))
|
||||
// Be conservative if the accessed pointer may alias the allocation -
|
||||
// fallback to the generic handling below.
|
||||
if ((AA->alias(Inst, AccessPtr) == NoAlias) &&
|
||||
// If the allocation is not aliased and does not read memory (like
|
||||
// strdup), it is safe to ignore.
|
||||
(isa<AllocaInst>(Inst) || isMallocLikeFn(Inst, TLI) ||
|
||||
isCallocLikeFn(Inst, TLI)))
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -792,10 +792,8 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
|
|||
static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
|
||||
int Count = -1) {
|
||||
if (Count == -1) Count = Cache.size();
|
||||
if (Count == 0) return;
|
||||
|
||||
for (unsigned i = 1; i != unsigned(Count); ++i)
|
||||
assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
|
||||
assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
|
||||
"Cache isn't sorted!");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -52,14 +52,13 @@ static bool hasSinCosPiStret(const Triple &T) {
|
|||
/// specified target triple. This should be carefully written so that a missing
|
||||
/// target triple gets a sane set of defaults.
|
||||
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
|
||||
const char *const *StandardNames) {
|
||||
#ifndef NDEBUG
|
||||
ArrayRef<const char *> StandardNames) {
|
||||
// Verify that the StandardNames array is in alphabetical order.
|
||||
for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
|
||||
if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
|
||||
llvm_unreachable("TargetLibraryInfoImpl function names must be sorted");
|
||||
}
|
||||
#endif // !NDEBUG
|
||||
assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
|
||||
[](const char *LHS, const char *RHS) {
|
||||
return strcmp(LHS, RHS) < 0;
|
||||
}) &&
|
||||
"TargetLibraryInfoImpl function names must be sorted");
|
||||
|
||||
if (T.getArch() == Triple::r600 ||
|
||||
T.getArch() == Triple::amdgcn) {
|
||||
|
|
|
|||
|
|
@ -1743,9 +1743,10 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
|
|||
return false;
|
||||
|
||||
Value *X = nullptr, *Y = nullptr;
|
||||
// A shift of a power of two is a power of two or zero.
|
||||
// A shift left or a logical shift right of a power of two is a power of two
|
||||
// or zero.
|
||||
if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
|
||||
match(V, m_Shr(m_Value(X), m_Value()))))
|
||||
match(V, m_LShr(m_Value(X), m_Value()))))
|
||||
return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL);
|
||||
|
||||
if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
|
||||
|
|
@ -2829,7 +2830,12 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
|
|||
const DataLayout &DL) {
|
||||
unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
|
||||
APInt ByteOffset(BitWidth, 0);
|
||||
while (1) {
|
||||
|
||||
// We walk up the defs but use a visited set to handle unreachable code. In
|
||||
// that case, we stop after accumulating the cycle once (not that it
|
||||
// matters).
|
||||
SmallPtrSet<Value *, 16> Visited;
|
||||
while (Visited.insert(Ptr).second) {
|
||||
if (Ptr->getType()->isVectorTy())
|
||||
break;
|
||||
|
||||
|
|
@ -3268,12 +3274,9 @@ static bool isDereferenceableAndAlignedPointer(
|
|||
}
|
||||
|
||||
// For gc.relocate, look through relocations
|
||||
if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
|
||||
if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
|
||||
GCRelocateOperands RelocateInst(I);
|
||||
return isDereferenceableAndAlignedPointer(
|
||||
RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
|
||||
}
|
||||
if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V))
|
||||
return isDereferenceableAndAlignedPointer(
|
||||
RelocateInst->getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
|
||||
|
||||
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
|
||||
return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL,
|
||||
|
|
@ -3474,10 +3477,6 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
|
|||
if (CS.isReturnNonNull())
|
||||
return true;
|
||||
|
||||
// operator new never returns null.
|
||||
if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3071,7 +3071,12 @@ void BitcodeReader::saveMetadataList(
|
|||
for (unsigned ID = 0; ID < MetadataList.size(); ++ID) {
|
||||
Metadata *MD = MetadataList[ID];
|
||||
auto *N = dyn_cast_or_null<MDNode>(MD);
|
||||
assert((!N || (N->isResolved() || N->isTemporary())) &&
|
||||
"Found non-resolved non-temp MDNode while saving metadata");
|
||||
// Save all values if !OnlyTempMD, otherwise just the temporary metadata.
|
||||
// Note that in the !OnlyTempMD case we need to save all Metadata, not
|
||||
// just MDNode, as we may have references to other types of module-level
|
||||
// metadata (e.g. ValueAsMetadata) from instructions.
|
||||
if (!OnlyTempMD || (N && N->isTemporary())) {
|
||||
// Will call this after materializing each function, in order to
|
||||
// handle remapping of the function's instructions/metadata.
|
||||
|
|
@ -3080,6 +3085,11 @@ void BitcodeReader::saveMetadataList(
|
|||
assert(MetadataToIDs[MD] == ID && "Inconsistent metadata value id");
|
||||
continue;
|
||||
}
|
||||
if (N && N->isTemporary())
|
||||
// Ensure that we assert if someone tries to RAUW this temporary
|
||||
// metadata while it is the key of a map. The flag will be set back
|
||||
// to true when the saved metadata list is destroyed.
|
||||
N->setCanReplace(false);
|
||||
MetadataToIDs[MD] = ID;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -976,32 +976,32 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
|
|||
}
|
||||
}
|
||||
|
||||
static int getRank(const WinEHFuncInfo &FuncInfo, int State) {
|
||||
static int getTryRank(const WinEHFuncInfo &FuncInfo, int State) {
|
||||
int Rank = 0;
|
||||
while (State != -1) {
|
||||
++Rank;
|
||||
State = FuncInfo.ClrEHUnwindMap[State].Parent;
|
||||
State = FuncInfo.ClrEHUnwindMap[State].TryParentState;
|
||||
}
|
||||
return Rank;
|
||||
}
|
||||
|
||||
static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
|
||||
int LeftRank = getRank(FuncInfo, Left);
|
||||
int RightRank = getRank(FuncInfo, Right);
|
||||
static int getTryAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
|
||||
int LeftRank = getTryRank(FuncInfo, Left);
|
||||
int RightRank = getTryRank(FuncInfo, Right);
|
||||
|
||||
while (LeftRank < RightRank) {
|
||||
Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
|
||||
Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
|
||||
--RightRank;
|
||||
}
|
||||
|
||||
while (RightRank < LeftRank) {
|
||||
Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
|
||||
Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
|
||||
--LeftRank;
|
||||
}
|
||||
|
||||
while (Left != Right) {
|
||||
Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
|
||||
Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
|
||||
Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
|
||||
Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
|
||||
}
|
||||
|
||||
return Left;
|
||||
|
|
@ -1035,9 +1035,9 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
|
|||
FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
|
||||
HandlerStates[HandlerBlock] = State;
|
||||
// Use this loop through all handlers to verify our assumption (used in
|
||||
// the MinEnclosingState computation) that ancestors have lower state
|
||||
// numbers than their descendants.
|
||||
assert(FuncInfo.ClrEHUnwindMap[State].Parent < State &&
|
||||
// the MinEnclosingState computation) that enclosing funclets have lower
|
||||
// state numbers than their enclosed funclets.
|
||||
assert(FuncInfo.ClrEHUnwindMap[State].HandlerParentState < State &&
|
||||
"ill-formed state numbering");
|
||||
}
|
||||
// Map the main function to the NullState.
|
||||
|
|
@ -1070,7 +1070,6 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
|
|||
SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates);
|
||||
|
||||
// Visit the root function and each funclet.
|
||||
|
||||
for (MachineFunction::const_iterator FuncletStart = MF->begin(),
|
||||
FuncletEnd = MF->begin(),
|
||||
End = MF->end();
|
||||
|
|
@ -1100,17 +1099,18 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
|
|||
for (const auto &StateChange :
|
||||
InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) {
|
||||
// Close any try regions we're not still under
|
||||
int AncestorState =
|
||||
getAncestor(FuncInfo, CurrentState, StateChange.NewState);
|
||||
while (CurrentState != AncestorState) {
|
||||
assert(CurrentState != NullState && "Failed to find ancestor!");
|
||||
int StillPendingState =
|
||||
getTryAncestor(FuncInfo, CurrentState, StateChange.NewState);
|
||||
while (CurrentState != StillPendingState) {
|
||||
assert(CurrentState != NullState &&
|
||||
"Failed to find still-pending state!");
|
||||
// Close the pending clause
|
||||
Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel,
|
||||
CurrentState, FuncletState});
|
||||
// Now the parent handler is current
|
||||
CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent;
|
||||
// Now the next-outer try region is current
|
||||
CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].TryParentState;
|
||||
// Pop the new start label from the handler stack if we've exited all
|
||||
// descendants of the corresponding handler.
|
||||
// inner try regions of the corresponding try region.
|
||||
if (HandlerStack.back().second == CurrentState)
|
||||
CurrentStartLabel = HandlerStack.pop_back_val().first;
|
||||
}
|
||||
|
|
@ -1121,7 +1121,8 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
|
|||
// it.
|
||||
for (int EnteredState = StateChange.NewState;
|
||||
EnteredState != CurrentState;
|
||||
EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) {
|
||||
EnteredState =
|
||||
FuncInfo.ClrEHUnwindMap[EnteredState].TryParentState) {
|
||||
int &MinEnclosingState = MinClauseMap[EnteredState];
|
||||
if (FuncletState < MinEnclosingState)
|
||||
MinEnclosingState = FuncletState;
|
||||
|
|
|
|||
|
|
@ -225,8 +225,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
|
|||
if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
|
||||
const DenseMap<unsigned int, unsigned int> &BypassWidths =
|
||||
TLI->getBypassSlowDivWidths();
|
||||
for (Function::iterator I = F.begin(); I != F.end(); I++)
|
||||
EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
|
||||
BasicBlock* BB = &*F.begin();
|
||||
while (BB != nullptr) {
|
||||
// bypassSlowDivision may create new BBs, but we don't want to reapply the
|
||||
// optimization to those blocks.
|
||||
BasicBlock* Next = BB->getNextNode();
|
||||
EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
|
||||
BB = Next;
|
||||
}
|
||||
}
|
||||
|
||||
// Eliminate blocks that contain only PHI nodes and an
|
||||
|
|
@ -526,19 +532,17 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
|
|||
// Computes a map of base pointer relocation instructions to corresponding
|
||||
// derived pointer relocation instructions given a vector of all relocate calls
|
||||
static void computeBaseDerivedRelocateMap(
|
||||
const SmallVectorImpl<User *> &AllRelocateCalls,
|
||||
DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
|
||||
RelocateInstMap) {
|
||||
const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
|
||||
DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
|
||||
&RelocateInstMap) {
|
||||
// Collect information in two maps: one primarily for locating the base object
|
||||
// while filling the second map; the second map is the final structure holding
|
||||
// a mapping between Base and corresponding Derived relocate calls
|
||||
DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
|
||||
for (auto &U : AllRelocateCalls) {
|
||||
GCRelocateOperands ThisRelocate(U);
|
||||
IntrinsicInst *I = cast<IntrinsicInst>(U);
|
||||
auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
|
||||
ThisRelocate.getDerivedPtrIndex());
|
||||
RelocateIdxMap.insert(std::make_pair(K, I));
|
||||
DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
|
||||
for (auto *ThisRelocate : AllRelocateCalls) {
|
||||
auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
|
||||
ThisRelocate->getDerivedPtrIndex());
|
||||
RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
|
||||
}
|
||||
for (auto &Item : RelocateIdxMap) {
|
||||
std::pair<unsigned, unsigned> Key = Item.first;
|
||||
|
|
@ -546,7 +550,7 @@ static void computeBaseDerivedRelocateMap(
|
|||
// Base relocation: nothing to insert
|
||||
continue;
|
||||
|
||||
IntrinsicInst *I = Item.second;
|
||||
GCRelocateInst *I = Item.second;
|
||||
auto BaseKey = std::make_pair(Key.first, Key.first);
|
||||
|
||||
// We're iterating over RelocateIdxMap so we cannot modify it.
|
||||
|
|
@ -579,16 +583,13 @@ static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
|
|||
// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
|
||||
// replace, computes a replacement, and affects it.
|
||||
static bool
|
||||
simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
|
||||
const SmallVectorImpl<IntrinsicInst *> &Targets) {
|
||||
simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
|
||||
const SmallVectorImpl<GCRelocateInst *> &Targets) {
|
||||
bool MadeChange = false;
|
||||
for (auto &ToReplace : Targets) {
|
||||
GCRelocateOperands MasterRelocate(RelocatedBase);
|
||||
GCRelocateOperands ThisRelocate(ToReplace);
|
||||
|
||||
assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
|
||||
for (GCRelocateInst *ToReplace : Targets) {
|
||||
assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
|
||||
"Not relocating a derived object of the original base object");
|
||||
if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
|
||||
if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
|
||||
// A duplicate relocate call. TODO: coalesce duplicates.
|
||||
continue;
|
||||
}
|
||||
|
|
@ -601,8 +602,8 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
|
|||
continue;
|
||||
}
|
||||
|
||||
Value *Base = ThisRelocate.getBasePtr();
|
||||
auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
|
||||
Value *Base = ToReplace->getBasePtr();
|
||||
auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
|
||||
if (!Derived || Derived->getPointerOperand() != Base)
|
||||
continue;
|
||||
|
||||
|
|
@ -680,12 +681,12 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
|
|||
// %val = load %ptr'
|
||||
bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
|
||||
bool MadeChange = false;
|
||||
SmallVector<User *, 2> AllRelocateCalls;
|
||||
SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
|
||||
|
||||
for (auto *U : I.users())
|
||||
if (isGCRelocate(dyn_cast<Instruction>(U)))
|
||||
if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
|
||||
// Collect all the relocate calls associated with a statepoint
|
||||
AllRelocateCalls.push_back(U);
|
||||
AllRelocateCalls.push_back(Relocate);
|
||||
|
||||
// We need atleast one base pointer relocation + one derived pointer
|
||||
// relocation to mangle
|
||||
|
|
@ -694,7 +695,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
|
|||
|
||||
// RelocateInstMap is a mapping from the base relocate instruction to the
|
||||
// corresponding derived relocate instructions
|
||||
DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
|
||||
DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
|
||||
computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
|
||||
if (RelocateInstMap.empty())
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -122,8 +122,7 @@ INITIALIZE_PASS_END(MachineCSE, "machine-cse",
|
|||
bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
|
||||
MachineBasicBlock *MBB) {
|
||||
bool Changed = false;
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
MachineOperand &MO = MI->getOperand(i);
|
||||
for (MachineOperand &MO : MI->operands()) {
|
||||
if (!MO.isReg() || !MO.isUse())
|
||||
continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
|
|
@ -186,8 +185,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
|
|||
return true;
|
||||
|
||||
bool SeenDef = false;
|
||||
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = I->getOperand(i);
|
||||
for (const MachineOperand &MO : I->operands()) {
|
||||
if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
|
||||
SeenDef = true;
|
||||
if (!MO.isReg() || !MO.getReg())
|
||||
|
|
@ -220,8 +218,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
|
|||
SmallVectorImpl<unsigned> &PhysDefs,
|
||||
bool &PhysUseDef) const{
|
||||
// First, add all uses to PhysRefs.
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
for (const MachineOperand &MO : MI->operands()) {
|
||||
if (!MO.isReg() || MO.isDef())
|
||||
continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
|
|
@ -239,8 +236,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
|
|||
// (which currently contains only uses), set the PhysUseDef flag.
|
||||
PhysUseDef = false;
|
||||
MachineBasicBlock::const_iterator I = MI; I = std::next(I);
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
for (const MachineOperand &MO : MI->operands()) {
|
||||
if (!MO.isReg() || !MO.isDef())
|
||||
continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
|
|
@ -311,8 +307,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
|
|||
if (I == E)
|
||||
return true;
|
||||
|
||||
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = I->getOperand(i);
|
||||
for (const MachineOperand &MO : I->operands()) {
|
||||
// RegMasks go on instructions like calls that clobber lots of physregs.
|
||||
// Don't attempt to CSE across such an instruction.
|
||||
if (MO.isRegMask())
|
||||
|
|
@ -398,8 +393,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
|
|||
// Heuristics #2: If the expression doesn't not use a vr and the only use
|
||||
// of the redundant computation are copies, do not cse.
|
||||
bool HasVRegUse = false;
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
for (const MachineOperand &MO : MI->operands()) {
|
||||
if (MO.isReg() && MO.isUse() &&
|
||||
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
|
||||
HasVRegUse = true;
|
||||
|
|
@ -580,9 +574,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
|
|||
|
||||
// Actually perform the elimination.
|
||||
if (DoCSE) {
|
||||
for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
|
||||
unsigned OldReg = CSEPairs[i].first;
|
||||
unsigned NewReg = CSEPairs[i].second;
|
||||
for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
|
||||
unsigned OldReg = CSEPair.first;
|
||||
unsigned NewReg = CSEPair.second;
|
||||
// OldReg may have been unused but is used now, clear the Dead flag
|
||||
MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
|
||||
assert(Def != nullptr && "CSEd register has no unique definition?");
|
||||
|
|
@ -594,8 +588,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
|
|||
|
||||
// Go through implicit defs of CSMI and MI, if a def is not dead at MI,
|
||||
// we should make sure it is not dead at CSMI.
|
||||
for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
|
||||
CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
|
||||
for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
|
||||
CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
|
||||
|
||||
// Go through implicit defs of CSMI and MI, and clear the kill flags on
|
||||
// their uses in all the instructions between CSMI and MI.
|
||||
|
|
@ -685,18 +679,14 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
|
|||
Node = WorkList.pop_back_val();
|
||||
Scopes.push_back(Node);
|
||||
const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
|
||||
unsigned NumChildren = Children.size();
|
||||
OpenChildren[Node] = NumChildren;
|
||||
for (unsigned i = 0; i != NumChildren; ++i) {
|
||||
MachineDomTreeNode *Child = Children[i];
|
||||
OpenChildren[Node] = Children.size();
|
||||
for (MachineDomTreeNode *Child : Children)
|
||||
WorkList.push_back(Child);
|
||||
}
|
||||
} while (!WorkList.empty());
|
||||
|
||||
// Now perform CSE.
|
||||
bool Changed = false;
|
||||
for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
|
||||
MachineDomTreeNode *Node = Scopes[i];
|
||||
for (MachineDomTreeNode *Node : Scopes) {
|
||||
MachineBasicBlock *MBB = Node->getBlock();
|
||||
EnterScope(MBB);
|
||||
Changed |= ProcessBlock(MBB);
|
||||
|
|
|
|||
|
|
@ -866,6 +866,27 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
|
|||
setMemRefs(NewMemRefs, NewMemRefs + NewNum);
|
||||
}
|
||||
|
||||
std::pair<MachineInstr::mmo_iterator, unsigned>
|
||||
MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
|
||||
// TODO: If we end up with too many memory operands, return the empty
|
||||
// conservative set rather than failing asserts.
|
||||
// TODO: consider uniquing elements within the operand lists to reduce
|
||||
// space usage and fall back to conservative information less often.
|
||||
size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin())
|
||||
+ (Other.memoperands_end() - Other.memoperands_begin());
|
||||
|
||||
MachineFunction *MF = getParent()->getParent();
|
||||
mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
|
||||
mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
|
||||
MemBegin);
|
||||
MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(),
|
||||
MemEnd);
|
||||
assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs &&
|
||||
"missing memrefs");
|
||||
|
||||
return std::make_pair(MemBegin, CombinedNumMemRefs);
|
||||
}
|
||||
|
||||
bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
|
||||
assert(!isBundledWithPred() && "Must be called on bundle header");
|
||||
for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
|
||||
|
|
@ -1738,7 +1759,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
|
|||
bool HaveSemi = false;
|
||||
const unsigned PrintableFlags = FrameSetup | FrameDestroy;
|
||||
if (Flags & PrintableFlags) {
|
||||
if (!HaveSemi) OS << ";"; HaveSemi = true;
|
||||
if (!HaveSemi) {
|
||||
OS << ";";
|
||||
HaveSemi = true;
|
||||
}
|
||||
OS << " flags: ";
|
||||
|
||||
if (Flags & FrameSetup)
|
||||
|
|
@ -1749,7 +1773,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
|
|||
}
|
||||
|
||||
if (!memoperands_empty()) {
|
||||
if (!HaveSemi) OS << ";"; HaveSemi = true;
|
||||
if (!HaveSemi) {
|
||||
OS << ";";
|
||||
HaveSemi = true;
|
||||
}
|
||||
|
||||
OS << " mem:";
|
||||
for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
|
||||
|
|
@ -1762,7 +1789,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
|
|||
|
||||
// Print the regclass of any virtual registers encountered.
|
||||
if (MRI && !VirtRegs.empty()) {
|
||||
if (!HaveSemi) OS << ";"; HaveSemi = true;
|
||||
if (!HaveSemi) {
|
||||
OS << ";";
|
||||
HaveSemi = true;
|
||||
}
|
||||
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
|
||||
OS << " " << TRI->getRegClassName(RC)
|
||||
|
|
@ -1781,7 +1811,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
|
|||
|
||||
// Print debug location information.
|
||||
if (isDebugValue() && getOperand(e - 2).isMetadata()) {
|
||||
if (!HaveSemi) OS << ";";
|
||||
if (!HaveSemi)
|
||||
OS << ";";
|
||||
auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());
|
||||
OS << " line no:" << DV->getLine();
|
||||
if (auto *InlinedAt = debugLoc->getInlinedAt()) {
|
||||
|
|
@ -1795,7 +1826,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
|
|||
if (isIndirectDebugValue())
|
||||
OS << " indirect";
|
||||
} else if (debugLoc && MF) {
|
||||
if (!HaveSemi) OS << ";";
|
||||
if (!HaveSemi)
|
||||
OS << ";";
|
||||
OS << " dbg:";
|
||||
debugLoc.print(OS);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -315,7 +315,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
|
|||
if (!TRI->regsOverlap(MOReg, Reg))
|
||||
continue;
|
||||
|
||||
bool Covered = TRI->isSuperRegisterEq(MOReg, Reg);
|
||||
bool Covered = TRI->isSuperRegisterEq(Reg, MOReg);
|
||||
if (MO.readsReg()) {
|
||||
PRI.Read = true;
|
||||
if (Covered) {
|
||||
|
|
|
|||
|
|
@ -97,9 +97,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
|
|||
unsigned Weight = PSetI.getWeight();
|
||||
for (; PSetI.isValid(); ++PSetI) {
|
||||
CurrSetPressure[*PSetI] += Weight;
|
||||
if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) {
|
||||
P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI];
|
||||
}
|
||||
P.MaxSetPressure[*PSetI] =
|
||||
std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6843,9 +6843,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
|
|||
uint64_t PtrOff = ShAmt / 8;
|
||||
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
|
||||
SDLoc DL(LN0);
|
||||
// The original load itself didn't wrap, so an offset within it doesn't.
|
||||
SDNodeFlags Flags;
|
||||
Flags.setNoUnsignedWrap(true);
|
||||
SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
|
||||
PtrType, LN0->getBasePtr(),
|
||||
DAG.getConstant(PtrOff, DL, PtrType));
|
||||
DAG.getConstant(PtrOff, DL, PtrType),
|
||||
&Flags);
|
||||
AddToWorklist(NewPtr.getNode());
|
||||
|
||||
SDValue Load;
|
||||
|
|
|
|||
|
|
@ -2843,6 +2843,43 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
|
|||
return (AZero | BZero).isAllOnesValue();
|
||||
}
|
||||
|
||||
static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops,
|
||||
llvm::SelectionDAG &DAG) {
|
||||
if (Ops.size() == 1)
|
||||
return Ops[0];
|
||||
|
||||
// Concat of UNDEFs is UNDEF.
|
||||
if (std::all_of(Ops.begin(), Ops.end(),
|
||||
[](SDValue Op) { return Op.isUndef(); }))
|
||||
return DAG.getUNDEF(VT);
|
||||
|
||||
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified
|
||||
// to one big BUILD_VECTOR.
|
||||
// FIXME: Add support for UNDEF and SCALAR_TO_VECTOR as well.
|
||||
if (!std::all_of(Ops.begin(), Ops.end(), [](SDValue Op) {
|
||||
return Op.getOpcode() == ISD::BUILD_VECTOR;
|
||||
}))
|
||||
return SDValue();
|
||||
|
||||
EVT SVT = VT.getScalarType();
|
||||
SmallVector<SDValue, 16> Elts;
|
||||
for (SDValue Op : Ops)
|
||||
Elts.append(Op->op_begin(), Op->op_end());
|
||||
|
||||
// BUILD_VECTOR requires all inputs to be of the same type, find the
|
||||
// maximum type and extend them all.
|
||||
for (SDValue Op : Elts)
|
||||
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
|
||||
|
||||
if (SVT.bitsGT(VT.getScalarType()))
|
||||
for (SDValue &Op : Elts)
|
||||
Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
|
||||
? DAG.getZExtOrTrunc(Op, DL, SVT)
|
||||
: DAG.getSExtOrTrunc(Op, DL, SVT);
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
|
||||
}
|
||||
|
||||
/// getNode - Gets or creates the specified node.
|
||||
///
|
||||
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
|
||||
|
|
@ -3426,34 +3463,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
|
|||
if (N2.getOpcode() == ISD::EntryToken) return N1;
|
||||
if (N1 == N2) return N1;
|
||||
break;
|
||||
case ISD::CONCAT_VECTORS:
|
||||
// Concat of UNDEFs is UNDEF.
|
||||
if (N1.getOpcode() == ISD::UNDEF &&
|
||||
N2.getOpcode() == ISD::UNDEF)
|
||||
return getUNDEF(VT);
|
||||
|
||||
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
|
||||
// one big BUILD_VECTOR.
|
||||
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
|
||||
N2.getOpcode() == ISD::BUILD_VECTOR) {
|
||||
SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
|
||||
N1.getNode()->op_end());
|
||||
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
|
||||
|
||||
// BUILD_VECTOR requires all inputs to be of the same type, find the
|
||||
// maximum type and extend them all.
|
||||
EVT SVT = VT.getScalarType();
|
||||
for (SDValue Op : Elts)
|
||||
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
|
||||
if (SVT.bitsGT(VT.getScalarType()))
|
||||
for (SDValue &Op : Elts)
|
||||
Op = TLI->isZExtFree(Op.getValueType(), SVT)
|
||||
? getZExtOrTrunc(Op, DL, SVT)
|
||||
: getSExtOrTrunc(Op, DL, SVT);
|
||||
|
||||
return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
|
||||
}
|
||||
case ISD::CONCAT_VECTORS: {
|
||||
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
|
||||
SDValue Ops[] = {N1, N2};
|
||||
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
|
||||
return V;
|
||||
break;
|
||||
}
|
||||
case ISD::AND:
|
||||
assert(VT.isInteger() && "This operator does not apply to FP types!");
|
||||
assert(N1.getValueType() == N2.getValueType() &&
|
||||
|
|
@ -3911,19 +3927,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
|
|||
}
|
||||
break;
|
||||
}
|
||||
case ISD::CONCAT_VECTORS:
|
||||
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
|
||||
// one big BUILD_VECTOR.
|
||||
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
|
||||
N2.getOpcode() == ISD::BUILD_VECTOR &&
|
||||
N3.getOpcode() == ISD::BUILD_VECTOR) {
|
||||
SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
|
||||
N1.getNode()->op_end());
|
||||
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
|
||||
Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
|
||||
return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
|
||||
}
|
||||
case ISD::CONCAT_VECTORS: {
|
||||
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
|
||||
SDValue Ops[] = {N1, N2, N3};
|
||||
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
|
||||
return V;
|
||||
break;
|
||||
}
|
||||
case ISD::SETCC: {
|
||||
// Use FoldSetCC to simplify SETCC's.
|
||||
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
|
||||
|
|
@ -5462,6 +5472,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
|
|||
|
||||
switch (Opcode) {
|
||||
default: break;
|
||||
case ISD::CONCAT_VECTORS: {
|
||||
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
|
||||
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
|
||||
return V;
|
||||
break;
|
||||
}
|
||||
case ISD::SELECT_CC: {
|
||||
assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
|
||||
assert(Ops[0].getValueType() == Ops[1].getValueType() &&
|
||||
|
|
|
|||
|
|
@ -1329,12 +1329,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
|
|||
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
|
||||
unsigned NumValues = ValueVTs.size();
|
||||
|
||||
// An aggregate return value cannot wrap around the address space, so
|
||||
// offsets to its parts don't wrap either.
|
||||
SDNodeFlags Flags;
|
||||
Flags.setNoUnsignedWrap(true);
|
||||
|
||||
SmallVector<SDValue, 4> Chains(NumValues);
|
||||
for (unsigned i = 0; i != NumValues; ++i) {
|
||||
SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
|
||||
RetPtr.getValueType(), RetPtr,
|
||||
DAG.getIntPtrConstant(Offsets[i],
|
||||
getCurSDLoc()));
|
||||
getCurSDLoc()),
|
||||
&Flags);
|
||||
Chains[i] =
|
||||
DAG.getStore(Chain, getCurSDLoc(),
|
||||
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
|
||||
|
|
@ -2994,8 +3000,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
|
|||
if (Field) {
|
||||
// N = N + Offset
|
||||
uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
|
||||
|
||||
// In an inbouds GEP with an offset that is nonnegative even when
|
||||
// interpreted as signed, assume there is no unsigned overflow.
|
||||
SDNodeFlags Flags;
|
||||
if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
|
||||
Flags.setNoUnsignedWrap(true);
|
||||
|
||||
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
|
||||
DAG.getConstant(Offset, dl, N.getValueType()));
|
||||
DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
|
||||
}
|
||||
|
||||
Ty = StTy->getElementType(Field);
|
||||
|
|
@ -3020,7 +3033,14 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
|
|||
SDValue OffsVal = VectorWidth ?
|
||||
DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
|
||||
DAG.getConstant(Offs, dl, PtrTy);
|
||||
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
|
||||
|
||||
// In an inbouds GEP with an offset that is nonnegative even when
|
||||
// interpreted as signed, assume there is no unsigned overflow.
|
||||
SDNodeFlags Flags;
|
||||
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
|
||||
Flags.setNoUnsignedWrap(true);
|
||||
|
||||
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -3092,10 +3112,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
|
|||
Align = 0;
|
||||
|
||||
// Round the size of the allocation up to the stack alignment size
|
||||
// by add SA-1 to the size.
|
||||
// by add SA-1 to the size. This doesn't overflow because we're computing
|
||||
// an address inside an alloca.
|
||||
SDNodeFlags Flags;
|
||||
Flags.setNoUnsignedWrap(true);
|
||||
AllocSize = DAG.getNode(ISD::ADD, dl,
|
||||
AllocSize.getValueType(), AllocSize,
|
||||
DAG.getIntPtrConstant(StackAlign - 1, dl));
|
||||
DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags);
|
||||
|
||||
// Mask out the low bits for alignment purposes.
|
||||
AllocSize = DAG.getNode(ISD::AND, dl,
|
||||
|
|
@ -3168,6 +3191,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
|
|||
if (isVolatile)
|
||||
Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
|
||||
|
||||
// An aggregate load cannot wrap around the address space, so offsets to its
|
||||
// parts don't wrap either.
|
||||
SDNodeFlags Flags;
|
||||
Flags.setNoUnsignedWrap(true);
|
||||
|
||||
SmallVector<SDValue, 4> Values(NumValues);
|
||||
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
|
||||
EVT PtrVT = Ptr.getValueType();
|
||||
|
|
@ -3188,7 +3216,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
|
|||
}
|
||||
SDValue A = DAG.getNode(ISD::ADD, dl,
|
||||
PtrVT, Ptr,
|
||||
DAG.getConstant(Offsets[i], dl, PtrVT));
|
||||
DAG.getConstant(Offsets[i], dl, PtrVT),
|
||||
&Flags);
|
||||
SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
|
||||
A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
|
||||
isNonTemporal, isInvariant, Alignment, AAInfo,
|
||||
|
|
@ -3243,6 +3272,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
|
|||
AAMDNodes AAInfo;
|
||||
I.getAAMetadata(AAInfo);
|
||||
|
||||
// An aggregate load cannot wrap around the address space, so offsets to its
|
||||
// parts don't wrap either.
|
||||
SDNodeFlags Flags;
|
||||
Flags.setNoUnsignedWrap(true);
|
||||
|
||||
unsigned ChainI = 0;
|
||||
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
|
||||
// See visitLoad comments.
|
||||
|
|
@ -3253,7 +3287,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
|
|||
ChainI = 0;
|
||||
}
|
||||
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
|
||||
DAG.getConstant(Offsets[i], dl, PtrVT));
|
||||
DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
|
||||
SDValue St = DAG.getStore(Root, dl,
|
||||
SDValue(Src.getNode(), Src.getResNo() + i),
|
||||
Add, MachinePointerInfo(PtrV, Offsets[i]),
|
||||
|
|
@ -5189,7 +5223,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
|||
return nullptr;
|
||||
}
|
||||
case Intrinsic::experimental_gc_relocate: {
|
||||
visitGCRelocate(I);
|
||||
visitGCRelocate(cast<GCRelocateInst>(I));
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::instrprof_increment:
|
||||
|
|
@ -7202,10 +7236,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
|||
ReturnValues.resize(NumValues);
|
||||
SmallVector<SDValue, 4> Chains(NumValues);
|
||||
|
||||
// An aggregate return value cannot wrap around the address space, so
|
||||
// offsets to its parts don't wrap either.
|
||||
SDNodeFlags Flags;
|
||||
Flags.setNoUnsignedWrap(true);
|
||||
|
||||
for (unsigned i = 0; i < NumValues; ++i) {
|
||||
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
|
||||
CLI.DAG.getConstant(Offsets[i], CLI.DL,
|
||||
PtrVT));
|
||||
PtrVT), &Flags);
|
||||
SDValue L = CLI.DAG.getLoad(
|
||||
RetTys[i], CLI.DL, CLI.Chain, Add,
|
||||
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
|
||||
|
|
|
|||
|
|
@ -855,7 +855,7 @@ private:
|
|||
|
||||
// These three are implemented in StatepointLowering.cpp
|
||||
void visitStatepoint(const CallInst &I);
|
||||
void visitGCRelocate(const CallInst &I);
|
||||
void visitGCRelocate(const GCRelocateInst &I);
|
||||
void visitGCResult(const CallInst &I);
|
||||
|
||||
void visitUserOp1(const Instruction &I) {
|
||||
|
|
|
|||
|
|
@ -633,6 +633,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
|
|||
MRI.replaceRegWith(From, To);
|
||||
}
|
||||
|
||||
if (TLI->hasCopyImplyingStackAdjustment(MF))
|
||||
MFI->setHasOpaqueSPAdjustment(true);
|
||||
|
||||
// Freeze the set of reserved registers now that MachineFrameInfo has been
|
||||
// set up. All the information required by getReservedRegs() should be
|
||||
// available now.
|
||||
|
|
|
|||
|
|
@ -128,13 +128,11 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
|
|||
return Optional<int>();
|
||||
|
||||
// Spill location is known for gc relocates
|
||||
if (isGCRelocate(Val)) {
|
||||
GCRelocateOperands RelocOps(cast<Instruction>(Val));
|
||||
|
||||
if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
|
||||
FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
|
||||
Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()];
|
||||
Builder.FuncInfo.StatepointRelocatedValues[Relocate->getStatepoint()];
|
||||
|
||||
auto It = SpillMap.find(RelocOps.getDerivedPtr());
|
||||
auto It = SpillMap.find(Relocate->getDerivedPtr());
|
||||
if (It == SpillMap.end())
|
||||
return Optional<int>();
|
||||
|
||||
|
|
@ -401,10 +399,10 @@ static void getIncomingStatepointGCValues(
|
|||
SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs,
|
||||
SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite,
|
||||
SelectionDAGBuilder &Builder) {
|
||||
for (GCRelocateOperands relocateOpers : StatepointSite.getRelocates()) {
|
||||
Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction());
|
||||
Bases.push_back(relocateOpers.getBasePtr());
|
||||
Ptrs.push_back(relocateOpers.getDerivedPtr());
|
||||
for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
|
||||
Relocs.push_back(Relocate);
|
||||
Bases.push_back(Relocate->getBasePtr());
|
||||
Ptrs.push_back(Relocate->getDerivedPtr());
|
||||
}
|
||||
|
||||
// Remove any redundant llvm::Values which map to the same SDValue as another
|
||||
|
|
@ -602,8 +600,8 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
|
|||
FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
|
||||
Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr];
|
||||
|
||||
for (GCRelocateOperands RelocateOpers : StatepointSite.getRelocates()) {
|
||||
const Value *V = RelocateOpers.getDerivedPtr();
|
||||
for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
|
||||
const Value *V = Relocate->getDerivedPtr();
|
||||
SDValue SDV = Builder.getValue(V);
|
||||
SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
|
||||
|
||||
|
|
@ -624,8 +622,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
|
|||
// uses of the corresponding values so that it would automatically
|
||||
// export them. Relocates of the spilled values does not use original
|
||||
// value.
|
||||
if (RelocateOpers.getUnderlyingCallSite().getParent() !=
|
||||
StatepointInstr->getParent())
|
||||
if (Relocate->getParent() != StatepointInstr->getParent())
|
||||
Builder.ExportFromCurrentBlock(V);
|
||||
}
|
||||
}
|
||||
|
|
@ -656,7 +653,7 @@ void SelectionDAGBuilder::LowerStatepoint(
|
|||
// statepoint.
|
||||
for (const User *U : CS->users()) {
|
||||
const CallInst *Call = cast<CallInst>(U);
|
||||
if (isGCRelocate(Call) && Call->getParent() == CS.getParent())
|
||||
if (isa<GCRelocateInst>(Call) && Call->getParent() == CS.getParent())
|
||||
StatepointLowering.scheduleRelocCall(*Call);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -859,24 +856,22 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
|
|||
}
|
||||
}
|
||||
|
||||
void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
|
||||
GCRelocateOperands RelocateOpers(&CI);
|
||||
|
||||
void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
|
||||
#ifndef NDEBUG
|
||||
// Consistency check
|
||||
// We skip this check for relocates not in the same basic block as thier
|
||||
// statepoint. It would be too expensive to preserve validation info through
|
||||
// different basic blocks.
|
||||
if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) {
|
||||
StatepointLowering.relocCallVisited(CI);
|
||||
if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) {
|
||||
StatepointLowering.relocCallVisited(Relocate);
|
||||
}
|
||||
#endif
|
||||
|
||||
const Value *DerivedPtr = RelocateOpers.getDerivedPtr();
|
||||
const Value *DerivedPtr = Relocate.getDerivedPtr();
|
||||
SDValue SD = getValue(DerivedPtr);
|
||||
|
||||
FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
|
||||
FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()];
|
||||
FuncInfo.StatepointRelocatedValues[Relocate.getStatepoint()];
|
||||
|
||||
// We should have recorded location for this pointer
|
||||
assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value");
|
||||
|
|
@ -885,7 +880,7 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
|
|||
// We didn't need to spill these special cases (constants and allocas).
|
||||
// See the handling in spillIncomingValueForStatepoint for detail.
|
||||
if (!DerivedPtrLocation) {
|
||||
setValue(&CI, SD);
|
||||
setValue(&Relocate, SD);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -907,5 +902,5 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
|
|||
DAG.setRoot(SpillLoad.getValue(1));
|
||||
|
||||
assert(SpillLoad.getNode());
|
||||
setValue(&CI, SpillLoad);
|
||||
setValue(&Relocate, SpillLoad);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -212,7 +212,7 @@ unsigned TargetSchedModel::computeOperandLatency(
|
|||
&& !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
|
||||
&& SchedModel.isComplete()) {
|
||||
errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
|
||||
<< *DefMI;
|
||||
<< *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
|
||||
llvm_unreachable("incomplete machine model");
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -17,11 +17,14 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/EHPersonalities.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/WinEHFuncInfo.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
|
@ -435,11 +438,12 @@ void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
|
|||
calculateStateNumbersForInvokes(Fn, FuncInfo);
|
||||
}
|
||||
|
||||
static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState,
|
||||
ClrHandlerType HandlerType, uint32_t TypeToken,
|
||||
const BasicBlock *Handler) {
|
||||
static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState,
|
||||
int TryParentState, ClrHandlerType HandlerType,
|
||||
uint32_t TypeToken, const BasicBlock *Handler) {
|
||||
ClrEHUnwindMapEntry Entry;
|
||||
Entry.Parent = ParentState;
|
||||
Entry.HandlerParentState = HandlerParentState;
|
||||
Entry.TryParentState = TryParentState;
|
||||
Entry.Handler = Handler;
|
||||
Entry.HandlerType = HandlerType;
|
||||
Entry.TypeToken = TypeToken;
|
||||
|
|
@ -453,82 +457,199 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
|
|||
if (!FuncInfo.EHPadStateMap.empty())
|
||||
return;
|
||||
|
||||
SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
|
||||
// This numbering assigns one state number to each catchpad and cleanuppad.
|
||||
// It also computes two tree-like relations over states:
|
||||
// 1) Each state has a "HandlerParentState", which is the state of the next
|
||||
// outer handler enclosing this state's handler (same as nearest ancestor
|
||||
// per the ParentPad linkage on EH pads, but skipping over catchswitches).
|
||||
// 2) Each state has a "TryParentState", which:
|
||||
// a) for a catchpad that's not the last handler on its catchswitch, is
|
||||
// the state of the next catchpad on that catchswitch
|
||||
// b) for all other pads, is the state of the pad whose try region is the
|
||||
// next outer try region enclosing this state's try region. The "try
|
||||
// regions are not present as such in the IR, but will be inferred
|
||||
// based on the placement of invokes and pads which reach each other
|
||||
// by exceptional exits
|
||||
// Catchswitches do not get their own states, but each gets mapped to the
|
||||
// state of its first catchpad.
|
||||
|
||||
// Each pad needs to be able to refer to its parent, so scan the function
|
||||
// looking for top-level handlers and seed the worklist with them.
|
||||
// Step one: walk down from outermost to innermost funclets, assigning each
|
||||
// catchpad and cleanuppad a state number. Add an entry to the
|
||||
// ClrEHUnwindMap for each state, recording its HandlerParentState and
|
||||
// handler attributes. Record the TryParentState as well for each catchpad
|
||||
// that's not the last on its catchswitch, but initialize all other entries'
|
||||
// TryParentStates to a sentinel -1 value that the next pass will update.
|
||||
|
||||
// Seed a worklist with pads that have no parent.
|
||||
SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
|
||||
for (const BasicBlock &BB : *Fn) {
|
||||
if (!BB.isEHPad())
|
||||
continue;
|
||||
if (BB.isLandingPad())
|
||||
report_fatal_error("CoreCLR EH cannot use landingpads");
|
||||
const Instruction *FirstNonPHI = BB.getFirstNonPHI();
|
||||
if (!isTopLevelPadForMSVC(FirstNonPHI))
|
||||
const Value *ParentPad;
|
||||
if (const auto *CPI = dyn_cast<CleanupPadInst>(FirstNonPHI))
|
||||
ParentPad = CPI->getParentPad();
|
||||
else if (const auto *CSI = dyn_cast<CatchSwitchInst>(FirstNonPHI))
|
||||
ParentPad = CSI->getParentPad();
|
||||
else
|
||||
continue;
|
||||
// queue this with sentinel parent state -1 to mean unwind to caller.
|
||||
Worklist.emplace_back(FirstNonPHI, -1);
|
||||
if (isa<ConstantTokenNone>(ParentPad))
|
||||
Worklist.emplace_back(FirstNonPHI, -1);
|
||||
}
|
||||
|
||||
// Use the worklist to visit all pads, from outer to inner. Record
|
||||
// HandlerParentState for all pads. Record TryParentState only for catchpads
|
||||
// that aren't the last on their catchswitch (setting all other entries'
|
||||
// TryParentStates to an initial value of -1). This loop is also responsible
|
||||
// for setting the EHPadStateMap entry for all catchpads, cleanuppads, and
|
||||
// catchswitches.
|
||||
while (!Worklist.empty()) {
|
||||
const Instruction *Pad;
|
||||
int ParentState;
|
||||
std::tie(Pad, ParentState) = Worklist.pop_back_val();
|
||||
int HandlerParentState;
|
||||
std::tie(Pad, HandlerParentState) = Worklist.pop_back_val();
|
||||
|
||||
Value *ParentPad;
|
||||
int PredState;
|
||||
if (const CleanupPadInst *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
|
||||
// A cleanup can have multiple exits; don't re-process after the first.
|
||||
if (FuncInfo.EHPadStateMap.count(Cleanup))
|
||||
continue;
|
||||
// CoreCLR personality uses arity to distinguish faults from finallies.
|
||||
const BasicBlock *PadBlock = Cleanup->getParent();
|
||||
if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
|
||||
// Create the entry for this cleanup with the appropriate handler
|
||||
// properties. Finaly and fault handlers are distinguished by arity.
|
||||
ClrHandlerType HandlerType =
|
||||
(Cleanup->getNumOperands() ? ClrHandlerType::Fault
|
||||
: ClrHandlerType::Finally);
|
||||
int NewState =
|
||||
addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock);
|
||||
FuncInfo.EHPadStateMap[Cleanup] = NewState;
|
||||
// Propagate the new state to all preds of the cleanup
|
||||
ParentPad = Cleanup->getParentPad();
|
||||
PredState = NewState;
|
||||
} else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
|
||||
SmallVector<const CatchPadInst *, 1> Handlers;
|
||||
for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
|
||||
const auto *Catch = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
|
||||
Handlers.push_back(Catch);
|
||||
}
|
||||
FuncInfo.EHPadStateMap[CatchSwitch] = ParentState;
|
||||
int NewState = ParentState;
|
||||
for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend();
|
||||
HandlerI != HandlerE; ++HandlerI) {
|
||||
const CatchPadInst *Catch = *HandlerI;
|
||||
const BasicBlock *PadBlock = Catch->getParent();
|
||||
(Cleanup->getNumArgOperands() ? ClrHandlerType::Fault
|
||||
: ClrHandlerType::Finally);
|
||||
int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1,
|
||||
HandlerType, 0, Pad->getParent());
|
||||
// Queue any child EH pads on the worklist.
|
||||
for (const User *U : Cleanup->users())
|
||||
if (const auto *I = dyn_cast<Instruction>(U))
|
||||
if (I->isEHPad())
|
||||
Worklist.emplace_back(I, CleanupState);
|
||||
// Remember this pad's state.
|
||||
FuncInfo.EHPadStateMap[Cleanup] = CleanupState;
|
||||
} else {
|
||||
// Walk the handlers of this catchswitch in reverse order since all but
|
||||
// the last need to set the following one as its TryParentState.
|
||||
const auto *CatchSwitch = cast<CatchSwitchInst>(Pad);
|
||||
int CatchState = -1, FollowerState = -1;
|
||||
SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers());
|
||||
for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend();
|
||||
CBI != CBE; ++CBI, FollowerState = CatchState) {
|
||||
const BasicBlock *CatchBlock = *CBI;
|
||||
// Create the entry for this catch with the appropriate handler
|
||||
// properties.
|
||||
const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI());
|
||||
uint32_t TypeToken = static_cast<uint32_t>(
|
||||
cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue());
|
||||
NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch,
|
||||
TypeToken, PadBlock);
|
||||
FuncInfo.EHPadStateMap[Catch] = NewState;
|
||||
CatchState =
|
||||
addClrEHHandler(FuncInfo, HandlerParentState, FollowerState,
|
||||
ClrHandlerType::Catch, TypeToken, CatchBlock);
|
||||
// Queue any child EH pads on the worklist.
|
||||
for (const User *U : Catch->users())
|
||||
if (const auto *I = dyn_cast<Instruction>(U))
|
||||
if (I->isEHPad())
|
||||
Worklist.emplace_back(I, CatchState);
|
||||
// Remember this catch's state.
|
||||
FuncInfo.EHPadStateMap[Catch] = CatchState;
|
||||
}
|
||||
for (const auto *CatchPad : Handlers) {
|
||||
for (const User *U : CatchPad->users()) {
|
||||
const auto *UserI = cast<Instruction>(U);
|
||||
if (UserI->isEHPad())
|
||||
Worklist.emplace_back(UserI, ParentState);
|
||||
}
|
||||
}
|
||||
PredState = NewState;
|
||||
ParentPad = CatchSwitch->getParentPad();
|
||||
} else {
|
||||
llvm_unreachable("Unexpected EH pad");
|
||||
}
|
||||
|
||||
// Queue all predecessors with the given state
|
||||
for (const BasicBlock *Pred : predecessors(Pad->getParent())) {
|
||||
if ((Pred = getEHPadFromPredecessor(Pred, ParentPad)))
|
||||
Worklist.emplace_back(Pred->getFirstNonPHI(), PredState);
|
||||
// Associate the catchswitch with the state of its first catch.
|
||||
assert(CatchSwitch->getNumHandlers());
|
||||
FuncInfo.EHPadStateMap[CatchSwitch] = CatchState;
|
||||
}
|
||||
}
|
||||
|
||||
// Step two: record the TryParentState of each state. For cleanuppads that
|
||||
// don't have cleanuprets, we may need to infer this from their child pads,
|
||||
// so visit pads in descendant-most to ancestor-most order.
|
||||
for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(),
|
||||
End = FuncInfo.ClrEHUnwindMap.rend();
|
||||
Entry != End; ++Entry) {
|
||||
const Instruction *Pad =
|
||||
Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI();
|
||||
// For most pads, the TryParentState is the state associated with the
|
||||
// unwind dest of exceptional exits from it.
|
||||
const BasicBlock *UnwindDest;
|
||||
if (const auto *Catch = dyn_cast<CatchPadInst>(Pad)) {
|
||||
// If a catch is not the last in its catchswitch, its TryParentState is
|
||||
// the state associated with the next catch in the switch, even though
|
||||
// that's not the unwind dest of exceptions escaping the catch. Those
|
||||
// cases were already assigned a TryParentState in the first pass, so
|
||||
// skip them.
|
||||
if (Entry->TryParentState != -1)
|
||||
continue;
|
||||
// Otherwise, get the unwind dest from the catchswitch.
|
||||
UnwindDest = Catch->getCatchSwitch()->getUnwindDest();
|
||||
} else {
|
||||
const auto *Cleanup = cast<CleanupPadInst>(Pad);
|
||||
UnwindDest = nullptr;
|
||||
for (const User *U : Cleanup->users()) {
|
||||
if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
|
||||
// Common and unambiguous case -- cleanupret indicates cleanup's
|
||||
// unwind dest.
|
||||
UnwindDest = CleanupRet->getUnwindDest();
|
||||
break;
|
||||
}
|
||||
|
||||
// Get an unwind dest for the user
|
||||
const BasicBlock *UserUnwindDest = nullptr;
|
||||
if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
|
||||
UserUnwindDest = Invoke->getUnwindDest();
|
||||
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(U)) {
|
||||
UserUnwindDest = CatchSwitch->getUnwindDest();
|
||||
} else if (auto *ChildCleanup = dyn_cast<CleanupPadInst>(U)) {
|
||||
int UserState = FuncInfo.EHPadStateMap[ChildCleanup];
|
||||
int UserUnwindState =
|
||||
FuncInfo.ClrEHUnwindMap[UserState].TryParentState;
|
||||
if (UserUnwindState != -1)
|
||||
UserUnwindDest = FuncInfo.ClrEHUnwindMap[UserUnwindState]
|
||||
.Handler.get<const BasicBlock *>();
|
||||
}
|
||||
|
||||
// Not having an unwind dest for this user might indicate that it
|
||||
// doesn't unwind, so can't be taken as proof that the cleanup itself
|
||||
// may unwind to caller (see e.g. SimplifyUnreachable and
|
||||
// RemoveUnwindEdge).
|
||||
if (!UserUnwindDest)
|
||||
continue;
|
||||
|
||||
// Now we have an unwind dest for the user, but we need to see if it
|
||||
// unwinds all the way out of the cleanup or if it stays within it.
|
||||
const Instruction *UserUnwindPad = UserUnwindDest->getFirstNonPHI();
|
||||
const Value *UserUnwindParent;
|
||||
if (auto *CSI = dyn_cast<CatchSwitchInst>(UserUnwindPad))
|
||||
UserUnwindParent = CSI->getParentPad();
|
||||
else
|
||||
UserUnwindParent =
|
||||
cast<CleanupPadInst>(UserUnwindPad)->getParentPad();
|
||||
|
||||
// The unwind stays within the cleanup iff it targets a child of the
|
||||
// cleanup.
|
||||
if (UserUnwindParent == Cleanup)
|
||||
continue;
|
||||
|
||||
// This unwind exits the cleanup, so its dest is the cleanup's dest.
|
||||
UnwindDest = UserUnwindDest;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Record the state of the unwind dest as the TryParentState.
|
||||
int UnwindDestState;
|
||||
|
||||
// If UnwindDest is null at this point, either the pad in question can
|
||||
// be exited by unwind to caller, or it cannot be exited by unwind. In
|
||||
// either case, reporting such cases as unwinding to caller is correct.
|
||||
// This can lead to EH tables that "look strange" -- if this pad's is in
|
||||
// a parent funclet which has other children that do unwind to an enclosing
|
||||
// pad, the try region for this pad will be missing the "duplicate" EH
|
||||
// clause entries that you'd expect to see covering the whole parent. That
|
||||
// should be benign, since the unwind never actually happens. If it were
|
||||
// an issue, we could add a subsequent pass that pushes unwind dests down
|
||||
// from parents that have them to children that appear to unwind to caller.
|
||||
if (!UnwindDest) {
|
||||
UnwindDestState = -1;
|
||||
} else {
|
||||
UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()];
|
||||
}
|
||||
|
||||
Entry->TryParentState = UnwindDestState;
|
||||
}
|
||||
|
||||
// Step three: transfer information from pads to invokes.
|
||||
calculateStateNumbersForInvokes(Fn, FuncInfo);
|
||||
}
|
||||
|
||||
|
|
@ -597,6 +718,11 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
|
|||
for (auto &Funclets : FuncletBlocks) {
|
||||
BasicBlock *FuncletPadBB = Funclets.first;
|
||||
std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
|
||||
Value *FuncletToken;
|
||||
if (FuncletPadBB == &F.getEntryBlock())
|
||||
FuncletToken = ConstantTokenNone::get(F.getContext());
|
||||
else
|
||||
FuncletToken = FuncletPadBB->getFirstNonPHI();
|
||||
|
||||
std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
|
||||
ValueToValueMapTy VMap;
|
||||
|
|
@ -668,15 +794,44 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
|
|||
RemapInstruction(&I, VMap,
|
||||
RF_IgnoreMissingEntries | RF_NoModuleLevelChanges);
|
||||
|
||||
// Catchrets targeting cloned blocks need to be updated separately from
|
||||
// the loop above because they are not in the current funclet.
|
||||
SmallVector<CatchReturnInst *, 2> FixupCatchrets;
|
||||
for (auto &BBMapping : Orig2Clone) {
|
||||
BasicBlock *OldBlock = BBMapping.first;
|
||||
BasicBlock *NewBlock = BBMapping.second;
|
||||
|
||||
FixupCatchrets.clear();
|
||||
for (BasicBlock *Pred : predecessors(OldBlock))
|
||||
if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator()))
|
||||
if (CatchRet->getParentPad() == FuncletToken)
|
||||
FixupCatchrets.push_back(CatchRet);
|
||||
|
||||
for (CatchReturnInst *CatchRet : FixupCatchrets)
|
||||
CatchRet->setSuccessor(NewBlock);
|
||||
}
|
||||
|
||||
auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
|
||||
unsigned NumPreds = PN->getNumIncomingValues();
|
||||
for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
|
||||
++PredIdx) {
|
||||
BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
|
||||
ColorVector &IncomingColors = BlockColors[IncomingBlock];
|
||||
bool BlockInFunclet = IncomingColors.size() == 1 &&
|
||||
IncomingColors.front() == FuncletPadBB;
|
||||
if (IsForOldBlock != BlockInFunclet)
|
||||
bool EdgeTargetsFunclet;
|
||||
if (auto *CRI =
|
||||
dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
|
||||
EdgeTargetsFunclet = (CRI->getParentPad() == FuncletToken);
|
||||
} else {
|
||||
ColorVector &IncomingColors = BlockColors[IncomingBlock];
|
||||
assert(!IncomingColors.empty() && "Block not colored!");
|
||||
assert((IncomingColors.size() == 1 ||
|
||||
llvm::all_of(IncomingColors,
|
||||
[&](BasicBlock *Color) {
|
||||
return Color != FuncletPadBB;
|
||||
})) &&
|
||||
"Cloning should leave this funclet's blocks monochromatic");
|
||||
EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB);
|
||||
}
|
||||
if (IsForOldBlock != EdgeTargetsFunclet)
|
||||
continue;
|
||||
PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
|
||||
// Revisit the next entry.
|
||||
|
|
@ -864,7 +1019,6 @@ void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
|
|||
}
|
||||
|
||||
void WinEHPrepare::verifyPreparedFunclets(Function &F) {
|
||||
// Recolor the CFG to verify that all is well.
|
||||
for (BasicBlock &BB : F) {
|
||||
size_t NumColors = BlockColors[&BB].size();
|
||||
assert(NumColors == 1 && "Expected monochromatic BB!");
|
||||
|
|
@ -872,12 +1026,8 @@ void WinEHPrepare::verifyPreparedFunclets(Function &F) {
|
|||
report_fatal_error("Uncolored BB!");
|
||||
if (NumColors > 1)
|
||||
report_fatal_error("Multicolor BB!");
|
||||
if (!DisableDemotion) {
|
||||
bool EHPadHasPHI = BB.isEHPad() && isa<PHINode>(BB.begin());
|
||||
assert(!EHPadHasPHI && "EH Pad still has a PHI!");
|
||||
if (EHPadHasPHI)
|
||||
report_fatal_error("EH Pad still has a PHI!");
|
||||
}
|
||||
assert((DisableDemotion || !(BB.isEHPad() && isa<PHINode>(BB.begin()))) &&
|
||||
"EH Pad still has a PHI!");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -896,12 +1046,17 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) {
|
|||
demotePHIsOnFunclets(F);
|
||||
|
||||
if (!DisableCleanups) {
|
||||
DEBUG(verifyFunction(F));
|
||||
removeImplausibleInstructions(F);
|
||||
|
||||
DEBUG(verifyFunction(F));
|
||||
cleanupPreparedFunclets(F);
|
||||
}
|
||||
|
||||
verifyPreparedFunclets(F);
|
||||
DEBUG(verifyPreparedFunclets(F));
|
||||
// Recolor the CFG to verify that all is well.
|
||||
DEBUG(colorFunclets(F));
|
||||
DEBUG(verifyPreparedFunclets(F));
|
||||
|
||||
BlockColors.clear();
|
||||
FuncletBlocks.clear();
|
||||
|
|
|
|||
|
|
@ -281,6 +281,7 @@ int FuzzerDriver(const std::vector<std::string> &Args,
|
|||
if (Flags.verbosity > 0 && !Dictionary.empty())
|
||||
Printf("Dictionary: %zd entries\n", Dictionary.size());
|
||||
Options.SaveArtifacts = !Flags.test_single_input;
|
||||
Options.PrintNewCovPcs = Flags.print_new_cov_pcs;
|
||||
|
||||
Fuzzer F(USF, Options);
|
||||
|
||||
|
|
|
|||
|
|
@ -72,3 +72,5 @@ FUZZER_FLAG_STRING(exact_artifact_path,
|
|||
FUZZER_FLAG_INT(drill, 0, "Experimental: fuzz using a single unit as the seed "
|
||||
"corpus, then merge with the initial corpus")
|
||||
FUZZER_FLAG_INT(output_csv, 0, "Enable pulse output in CSV format.")
|
||||
FUZZER_FLAG_INT(print_new_cov_pcs, 0, "If 1, print out new covered pcs.")
|
||||
|
||||
|
|
|
|||
|
|
@ -97,6 +97,7 @@ class Fuzzer {
|
|||
bool SaveArtifacts = true;
|
||||
bool PrintNEW = true; // Print a status line when new units are found;
|
||||
bool OutputCSV = false;
|
||||
bool PrintNewCovPcs = false;
|
||||
};
|
||||
Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options);
|
||||
void AddToCorpus(const Unit &U) { Corpus.push_back(U); }
|
||||
|
|
@ -188,6 +189,7 @@ class Fuzzer {
|
|||
long EpochOfLastReadOfOutputCorpus = 0;
|
||||
size_t LastRecordedBlockCoverage = 0;
|
||||
size_t LastRecordedCallerCalleeCoverage = 0;
|
||||
size_t LastCoveragePcBufferLen = 0;
|
||||
};
|
||||
|
||||
class SimpleUserSuppliedFuzzer: public UserSuppliedFuzzer {
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ void __sanitizer_set_death_callback(void (*callback)(void));
|
|||
__attribute__((weak)) size_t __sanitizer_get_number_of_counters();
|
||||
__attribute__((weak))
|
||||
uintptr_t __sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset);
|
||||
__attribute__((weak)) uintptr_t
|
||||
__sanitizer_get_coverage_pc_buffer(uintptr_t **data);
|
||||
}
|
||||
|
||||
namespace fuzzer {
|
||||
|
|
@ -249,7 +251,21 @@ void Fuzzer::ExecuteCallback(const Unit &U) {
|
|||
|
||||
size_t Fuzzer::RecordBlockCoverage() {
|
||||
CHECK_WEAK_API_FUNCTION(__sanitizer_get_total_unique_coverage);
|
||||
return LastRecordedBlockCoverage = __sanitizer_get_total_unique_coverage();
|
||||
uintptr_t PrevCoverage = LastRecordedBlockCoverage;
|
||||
LastRecordedBlockCoverage = __sanitizer_get_total_unique_coverage();
|
||||
|
||||
if (PrevCoverage == LastRecordedBlockCoverage || !Options.PrintNewCovPcs)
|
||||
return LastRecordedBlockCoverage;
|
||||
|
||||
uintptr_t PrevBufferLen = LastCoveragePcBufferLen;
|
||||
uintptr_t *CoverageBuf;
|
||||
LastCoveragePcBufferLen = __sanitizer_get_coverage_pc_buffer(&CoverageBuf);
|
||||
assert(CoverageBuf);
|
||||
for (size_t i = PrevBufferLen; i < LastCoveragePcBufferLen; ++i) {
|
||||
Printf("0x%x\n", CoverageBuf[i]);
|
||||
}
|
||||
|
||||
return LastRecordedBlockCoverage;
|
||||
}
|
||||
|
||||
size_t Fuzzer::RecordCallerCalleeCoverage() {
|
||||
|
|
|
|||
|
|
@ -117,11 +117,18 @@ size_t MutationDispatcher::Mutate_AddWordFromDictionary(uint8_t *Data,
|
|||
assert(!D.empty());
|
||||
if (D.empty()) return 0;
|
||||
const Unit &Word = D[Rand(D.size())];
|
||||
if (Size + Word.size() > MaxSize) return 0;
|
||||
size_t Idx = Rand(Size + 1);
|
||||
memmove(Data + Idx + Word.size(), Data + Idx, Size - Idx);
|
||||
memcpy(Data + Idx, Word.data(), Word.size());
|
||||
return Size + Word.size();
|
||||
if (Rand.RandBool()) { // Insert Word.
|
||||
if (Size + Word.size() > MaxSize) return 0;
|
||||
size_t Idx = Rand(Size + 1);
|
||||
memmove(Data + Idx + Word.size(), Data + Idx, Size - Idx);
|
||||
memcpy(Data + Idx, Word.data(), Word.size());
|
||||
return Size + Word.size();
|
||||
} else { // Overwrite some bytes with Word.
|
||||
if (Word.size() > Size) return 0;
|
||||
size_t Idx = Rand(Size - Word.size());
|
||||
memcpy(Data + Idx, Word.data(), Word.size());
|
||||
return Size;
|
||||
}
|
||||
}
|
||||
|
||||
size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size,
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
|
||||
#if !LLVM_FUZZER_SUPPORTS_DFSAN
|
||||
|
|
@ -172,8 +173,13 @@ struct TraceBasedMutation {
|
|||
|
||||
class TraceState {
|
||||
public:
|
||||
TraceState(const Fuzzer::FuzzingOptions &Options, const Unit &CurrentUnit)
|
||||
: Options(Options), CurrentUnit(CurrentUnit) {}
|
||||
TraceState(const Fuzzer::FuzzingOptions &Options, const Unit &CurrentUnit)
|
||||
: Options(Options), CurrentUnit(CurrentUnit) {
|
||||
// Current trace collection is not thread-friendly and it probably
|
||||
// does not have to be such, but at least we should not crash in presence
|
||||
// of threads. So, just ignore all traces coming from all threads but one.
|
||||
IsMyThread = true;
|
||||
}
|
||||
|
||||
LabelRange GetLabelRange(dfsan_label L);
|
||||
void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
|
||||
|
|
@ -213,8 +219,11 @@ class TraceState {
|
|||
LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)];
|
||||
const Fuzzer::FuzzingOptions &Options;
|
||||
const Unit &CurrentUnit;
|
||||
static thread_local bool IsMyThread;
|
||||
};
|
||||
|
||||
thread_local bool TraceState::IsMyThread;
|
||||
|
||||
LabelRange TraceState::GetLabelRange(dfsan_label L) {
|
||||
LabelRange &LR = LabelRanges[L];
|
||||
if (LR.Beg < LR.End || L == 0)
|
||||
|
|
@ -238,7 +247,7 @@ void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
|
|||
uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
|
||||
dfsan_label L2) {
|
||||
assert(ReallyHaveDFSan());
|
||||
if (!RecordingTraces) return;
|
||||
if (!RecordingTraces || !IsMyThread) return;
|
||||
if (L1 == 0 && L2 == 0)
|
||||
return; // Not actionable.
|
||||
if (L1 != 0 && L2 != 0)
|
||||
|
|
@ -267,7 +276,7 @@ void TraceState::DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits,
|
|||
uint64_t Val, size_t NumCases,
|
||||
uint64_t *Cases, dfsan_label L) {
|
||||
assert(ReallyHaveDFSan());
|
||||
if (!RecordingTraces) return;
|
||||
if (!RecordingTraces || !IsMyThread) return;
|
||||
if (!L) return; // Not actionable.
|
||||
LabelRange LR = GetLabelRange(L);
|
||||
size_t ValSize = ValSizeInBits / 8;
|
||||
|
|
@ -312,7 +321,7 @@ int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
|
|||
|
||||
void TraceState::TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
|
||||
uint64_t Arg1, uint64_t Arg2) {
|
||||
if (!RecordingTraces) return;
|
||||
if (!RecordingTraces || !IsMyThread) return;
|
||||
int Added = 0;
|
||||
if (Options.Verbosity >= 3)
|
||||
Printf("TraceCmp %zd/%zd: %p %zd %zd\n", CmpSize, CmpType, PC, Arg1, Arg2);
|
||||
|
|
@ -327,7 +336,7 @@ void TraceState::TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
|
|||
void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
|
||||
uint64_t Val, size_t NumCases,
|
||||
uint64_t *Cases) {
|
||||
if (!RecordingTraces) return;
|
||||
if (!RecordingTraces || !IsMyThread) return;
|
||||
size_t ValSize = ValSizeInBits / 8;
|
||||
bool TryShort = IsTwoByteData(Val);
|
||||
for (size_t i = 0; i < NumCases; i++)
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ set(Tests
|
|||
StrcmpTest
|
||||
StrncmpTest
|
||||
SwitchTest
|
||||
ThreadedTest
|
||||
TimeoutTest
|
||||
)
|
||||
|
||||
|
|
|
|||
23
lib/Fuzzer/test/ThreadedTest.cpp
Normal file
23
lib/Fuzzer/test/ThreadedTest.cpp
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
// Threaded test for a fuzzer. The fuzzer should not crash.
|
||||
#include <assert.h>
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <thread>
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
if (Size < 8) return 0;
|
||||
assert(Data);
|
||||
auto C = [&] {
|
||||
size_t Res = 0;
|
||||
for (size_t i = 0; i < Size / 2; i++)
|
||||
Res += memcmp(Data, Data + Size / 2, 4);
|
||||
return Res;
|
||||
};
|
||||
std::thread T[] = {std::thread(C), std::thread(C), std::thread(C),
|
||||
std::thread(C), std::thread(C), std::thread(C)};
|
||||
for (auto &X : T)
|
||||
X.join();
|
||||
return 0;
|
||||
}
|
||||
|
||||
7
lib/Fuzzer/test/fuzzer-threaded.test
Normal file
7
lib/Fuzzer/test/fuzzer-threaded.test
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
CHECK: Done 1000 runs in
|
||||
|
||||
RUN: LLVMFuzzer-ThreadedTest -use_traces=1 -runs=1000 2>&1 | FileCheck %s
|
||||
RUN: LLVMFuzzer-ThreadedTest -use_traces=1 -runs=1000 2>&1 | FileCheck %s
|
||||
RUN: LLVMFuzzer-ThreadedTest -use_traces=1 -runs=1000 2>&1 | FileCheck %s
|
||||
RUN: LLVMFuzzer-ThreadedTest -use_traces=1 -runs=1000 2>&1 | FileCheck %s
|
||||
|
||||
|
|
@ -30,3 +30,9 @@ RUN: LLVMFuzzer-SimpleDictionaryTest -seed=1 -runs=100000
|
|||
|
||||
RUN: not LLVMFuzzer-UninstrumentedTest-Uninstrumented 2>&1 | FileCheck %s --check-prefix=UNINSTRUMENTED
|
||||
UNINSTRUMENTED: ERROR: __sanitizer_set_death_callback is not defined. Exiting.
|
||||
|
||||
RUN: LLVMFuzzer-SimpleTest -print_new_cov_pcs=1 2>&1 | FileCheck %s --check-prefix=PCS
|
||||
PCS:{{^0x[a-f0-9]+}}
|
||||
PCS:NEW
|
||||
PCS:BINGO
|
||||
|
||||
|
|
|
|||
|
|
@ -2060,7 +2060,7 @@ private:
|
|||
|
||||
// printGCRelocateComment - print comment after call to the gc.relocate
|
||||
// intrinsic indicating base and derived pointer names.
|
||||
void printGCRelocateComment(const Value &V);
|
||||
void printGCRelocateComment(const GCRelocateInst &Relocate);
|
||||
};
|
||||
} // namespace
|
||||
|
||||
|
|
@ -2722,14 +2722,11 @@ void AssemblyWriter::printInstructionLine(const Instruction &I) {
|
|||
|
||||
/// printGCRelocateComment - print comment after call to the gc.relocate
|
||||
/// intrinsic indicating base and derived pointer names.
|
||||
void AssemblyWriter::printGCRelocateComment(const Value &V) {
|
||||
assert(isGCRelocate(&V));
|
||||
GCRelocateOperands GCOps(cast<Instruction>(&V));
|
||||
|
||||
void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) {
|
||||
Out << " ; (";
|
||||
writeOperand(GCOps.getBasePtr(), false);
|
||||
writeOperand(Relocate.getBasePtr(), false);
|
||||
Out << ", ";
|
||||
writeOperand(GCOps.getDerivedPtr(), false);
|
||||
writeOperand(Relocate.getDerivedPtr(), false);
|
||||
Out << ")";
|
||||
}
|
||||
|
||||
|
|
@ -2737,8 +2734,8 @@ void AssemblyWriter::printGCRelocateComment(const Value &V) {
|
|||
/// which slot it occupies.
|
||||
///
|
||||
void AssemblyWriter::printInfoComment(const Value &V) {
|
||||
if (isGCRelocate(&V))
|
||||
printGCRelocateComment(V);
|
||||
if (const auto *Relocate = dyn_cast<GCRelocateInst>(&V))
|
||||
printGCRelocateComment(*Relocate);
|
||||
|
||||
if (AnnotationWriter)
|
||||
AnnotationWriter->printInfoComment(V, Out);
|
||||
|
|
|
|||
|
|
@ -641,14 +641,15 @@ AttributeSet AttributeSet::get(LLVMContext &C,
|
|||
if (Attrs.empty())
|
||||
return AttributeSet();
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
|
||||
assert((!i || Attrs[i-1].first <= Attrs[i].first) &&
|
||||
"Misordered Attributes list!");
|
||||
assert(!Attrs[i].second.hasAttribute(Attribute::None) &&
|
||||
"Pointless attribute!");
|
||||
}
|
||||
#endif
|
||||
assert(std::is_sorted(Attrs.begin(), Attrs.end(),
|
||||
[](const std::pair<unsigned, Attribute> &LHS,
|
||||
const std::pair<unsigned, Attribute> &RHS) {
|
||||
return LHS.first < RHS.first;
|
||||
}) && "Misordered Attributes list!");
|
||||
assert(std::none_of(Attrs.begin(), Attrs.end(),
|
||||
[](const std::pair<unsigned, Attribute> &Pair) {
|
||||
return Pair.second.hasAttribute(Attribute::None);
|
||||
}) && "Pointless attribute!");
|
||||
|
||||
// Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes
|
||||
// list.
|
||||
|
|
|
|||
|
|
@ -76,22 +76,21 @@ iplist<Instruction>::iterator Instruction::eraseFromParent() {
|
|||
return getParent()->getInstList().erase(getIterator());
|
||||
}
|
||||
|
||||
/// insertBefore - Insert an unlinked instructions into a basic block
|
||||
/// immediately before the specified instruction.
|
||||
/// Insert an unlinked instruction into a basic block immediately before the
|
||||
/// specified instruction.
|
||||
void Instruction::insertBefore(Instruction *InsertPos) {
|
||||
InsertPos->getParent()->getInstList().insert(InsertPos->getIterator(), this);
|
||||
}
|
||||
|
||||
/// insertAfter - Insert an unlinked instructions into a basic block
|
||||
/// immediately after the specified instruction.
|
||||
/// Insert an unlinked instruction into a basic block immediately after the
|
||||
/// specified instruction.
|
||||
void Instruction::insertAfter(Instruction *InsertPos) {
|
||||
InsertPos->getParent()->getInstList().insertAfter(InsertPos->getIterator(),
|
||||
this);
|
||||
}
|
||||
|
||||
/// moveBefore - Unlink this instruction from its current basic block and
|
||||
/// insert it into the basic block that MovePos lives in, right before
|
||||
/// MovePos.
|
||||
/// Unlink this instruction from its current basic block and insert it into the
|
||||
/// basic block that MovePos lives in, right before MovePos.
|
||||
void Instruction::moveBefore(Instruction *MovePos) {
|
||||
MovePos->getParent()->getInstList().splice(
|
||||
MovePos->getIterator(), getParent()->getInstList(), getIterator());
|
||||
|
|
|
|||
|
|
@ -609,20 +609,6 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
|
|||
return setSuccessor(idx, B);
|
||||
}
|
||||
|
||||
bool InvokeInst::hasFnAttrImpl(Attribute::AttrKind A) const {
|
||||
if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
|
||||
return true;
|
||||
|
||||
// Operand bundles override attributes on the called function, but don't
|
||||
// override attributes directly present on the invoke instruction.
|
||||
if (isFnAttrDisallowedByOpBundle(A))
|
||||
return false;
|
||||
|
||||
if (const Function *F = getCalledFunction())
|
||||
return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
|
||||
assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");
|
||||
|
||||
|
|
@ -934,6 +920,17 @@ void CatchSwitchInst::addHandler(BasicBlock *Handler) {
|
|||
getOperandList()[OpNo] = Handler;
|
||||
}
|
||||
|
||||
void CatchSwitchInst::removeHandler(handler_iterator HI) {
|
||||
// Move all subsequent handlers up one.
|
||||
Use *EndDst = op_end() - 1;
|
||||
for (Use *CurDst = HI.getCurrent(); CurDst != EndDst; ++CurDst)
|
||||
*CurDst = *(CurDst + 1);
|
||||
// Null out the last handler use.
|
||||
*EndDst = nullptr;
|
||||
|
||||
setNumHungOffUseOperands(getNumOperands() - 1);
|
||||
}
|
||||
|
||||
BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const {
|
||||
return getSuccessor(idx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -190,6 +190,8 @@ void ReplaceableMetadataImpl::moveRef(void *Ref, void *New,
|
|||
void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
|
||||
assert(!(MD && isa<MDNode>(MD) && cast<MDNode>(MD)->isTemporary()) &&
|
||||
"Expected non-temp node");
|
||||
assert(CanReplace &&
|
||||
"Attempted to replace Metadata marked for no replacement");
|
||||
|
||||
if (UseMap.empty())
|
||||
return;
|
||||
|
|
@ -555,7 +557,7 @@ void MDNode::decrementUnresolvedOperandCount() {
|
|||
resolve();
|
||||
}
|
||||
|
||||
void MDNode::resolveCycles(bool MDMaterialized) {
|
||||
void MDNode::resolveCycles(bool AllowTemps) {
|
||||
if (isResolved())
|
||||
return;
|
||||
|
||||
|
|
@ -568,7 +570,7 @@ void MDNode::resolveCycles(bool MDMaterialized) {
|
|||
if (!N)
|
||||
continue;
|
||||
|
||||
if (N->isTemporary() && !MDMaterialized)
|
||||
if (N->isTemporary() && AllowTemps)
|
||||
continue;
|
||||
assert(!N->isTemporary() &&
|
||||
"Expected all forward declarations to be resolved");
|
||||
|
|
|
|||
|
|
@ -40,20 +40,7 @@ bool llvm::isStatepoint(const Value &inst) {
|
|||
}
|
||||
|
||||
bool llvm::isGCRelocate(const ImmutableCallSite &CS) {
|
||||
if (!CS.getInstruction()) {
|
||||
// This is not a call site
|
||||
return false;
|
||||
}
|
||||
|
||||
return isGCRelocate(CS.getInstruction());
|
||||
}
|
||||
bool llvm::isGCRelocate(const Value *inst) {
|
||||
if (const CallInst *call = dyn_cast<CallInst>(inst)) {
|
||||
if (const Function *F = call->getCalledFunction()) {
|
||||
return F->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return CS.getInstruction() && isa<GCRelocateInst>(CS.getInstruction());
|
||||
}
|
||||
|
||||
bool llvm::isGCResult(const ImmutableCallSite &CS) {
|
||||
|
|
|
|||
|
|
@ -1657,14 +1657,14 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
|
|||
const CallInst *Call = dyn_cast<const CallInst>(U);
|
||||
Assert(Call, "illegal use of statepoint token", &CI, U);
|
||||
if (!Call) continue;
|
||||
Assert(isGCRelocate(Call) || isGCResult(Call),
|
||||
Assert(isa<GCRelocateInst>(Call) || isGCResult(Call),
|
||||
"gc.result or gc.relocate are the only value uses"
|
||||
"of a gc.statepoint",
|
||||
&CI, U);
|
||||
if (isGCResult(Call)) {
|
||||
Assert(Call->getArgOperand(0) == &CI,
|
||||
"gc.result connected to wrong gc.statepoint", &CI, Call);
|
||||
} else if (isGCRelocate(Call)) {
|
||||
} else if (isa<GCRelocateInst>(Call)) {
|
||||
Assert(Call->getArgOperand(0) == &CI,
|
||||
"gc.relocate connected to wrong gc.statepoint", &CI, Call);
|
||||
}
|
||||
|
|
@ -3019,8 +3019,7 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
|
|||
&CPI);
|
||||
|
||||
auto *ParentPad = CPI.getParentPad();
|
||||
Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
|
||||
isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
|
||||
Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
|
||||
"CleanupPadInst has an invalid parent.", &CPI);
|
||||
|
||||
User *FirstUser = nullptr;
|
||||
|
|
@ -3077,10 +3076,17 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
|
|||
}
|
||||
|
||||
auto *ParentPad = CatchSwitch.getParentPad();
|
||||
Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
|
||||
isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
|
||||
Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
|
||||
"CatchSwitchInst has an invalid parent.", ParentPad);
|
||||
|
||||
Assert(CatchSwitch.getNumHandlers() != 0,
|
||||
"CatchSwitchInst cannot have empty handler list", &CatchSwitch);
|
||||
|
||||
for (BasicBlock *Handler : CatchSwitch.handlers()) {
|
||||
Assert(isa<CatchPadInst>(Handler->getFirstNonPHI()),
|
||||
"CatchSwitchInst handlers must be catchpads", &CatchSwitch, Handler);
|
||||
}
|
||||
|
||||
visitTerminatorInst(CatchSwitch);
|
||||
}
|
||||
|
||||
|
|
@ -3675,8 +3681,8 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
|
|||
|
||||
// Verify rest of the relocate arguments
|
||||
|
||||
GCRelocateOperands Ops(CS);
|
||||
ImmutableCallSite StatepointCS(Ops.getStatepoint());
|
||||
ImmutableCallSite StatepointCS(
|
||||
cast<GCRelocateInst>(*CS.getInstruction()).getStatepoint());
|
||||
|
||||
// Both the base and derived must be piped through the safepoint
|
||||
Value* Base = CS.getArgOperand(1);
|
||||
|
|
@ -3731,14 +3737,14 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
|
|||
// Relocated value must be a pointer type, but gc_relocate does not need to return the
|
||||
// same pointer type as the relocated pointer. It can be casted to the correct type later
|
||||
// if it's desired. However, they must have the same address space.
|
||||
GCRelocateOperands Operands(CS);
|
||||
Assert(Operands.getDerivedPtr()->getType()->isPointerTy(),
|
||||
GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
|
||||
Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(),
|
||||
"gc.relocate: relocated value must be a gc pointer", CS);
|
||||
|
||||
// gc_relocate return type must be a pointer type, and is verified earlier in
|
||||
// VerifyIntrinsicType().
|
||||
Assert(cast<PointerType>(CS.getType())->getAddressSpace() ==
|
||||
cast<PointerType>(Operands.getDerivedPtr()->getType())->getAddressSpace(),
|
||||
cast<PointerType>(Relocate.getDerivedPtr()->getType())->getAddressSpace(),
|
||||
"gc.relocate: relocating a pointer shouldn't change its address space", CS);
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -524,6 +524,23 @@ public:
|
|||
ValueMapperFlags = ValueMapperFlags | RF_HaveUnmaterializedMetadata;
|
||||
}
|
||||
|
||||
~IRLinker() {
|
||||
// In the case where we are not linking metadata, we unset the CanReplace
|
||||
// flag on all temporary metadata in the MetadataToIDs map to ensure
|
||||
// none was replaced while being a map key. Now that we are destructing
|
||||
// the map, set the flag back to true, so that it is replaceable during
|
||||
// metadata linking.
|
||||
if (!shouldLinkMetadata()) {
|
||||
for (auto MDI : MetadataToIDs) {
|
||||
Metadata *MD = const_cast<Metadata *>(MDI.first);
|
||||
MDNode *Node = dyn_cast<MDNode>(MD);
|
||||
assert((Node && Node->isTemporary()) &&
|
||||
"Found non-temp metadata in map when not linking metadata");
|
||||
Node->setCanReplace(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool run();
|
||||
Value *materializeDeclFor(Value *V, bool ForAlias);
|
||||
void materializeInitFor(GlobalValue *New, GlobalValue *Old, bool ForAlias);
|
||||
|
|
@ -1111,7 +1128,8 @@ bool IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
|
|||
// a function and before remapping metadata on instructions below
|
||||
// in RemapInstruction, as the saved mapping is used to handle
|
||||
// the temporary metadata hanging off instructions.
|
||||
SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, true);
|
||||
SrcM.getMaterializer()->saveMetadataList(MetadataToIDs,
|
||||
/* OnlyTempMD = */ true);
|
||||
|
||||
// Link in the prefix data.
|
||||
if (Src.hasPrefixData())
|
||||
|
|
@ -1514,7 +1532,8 @@ bool IRLinker::run() {
|
|||
// Ensure metadata materialized
|
||||
if (SrcM.getMaterializer()->materializeMetadata())
|
||||
return true;
|
||||
SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, false);
|
||||
SrcM.getMaterializer()->saveMetadataList(MetadataToIDs,
|
||||
/* OnlyTempMD = */ false);
|
||||
}
|
||||
|
||||
linkNamedMDNodes();
|
||||
|
|
|
|||
|
|
@ -514,13 +514,13 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
|
|||
MCOS->EmitULEB128IntValue(1);
|
||||
MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
|
||||
MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
|
||||
EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list,
|
||||
context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
|
||||
: dwarf::DW_FORM_data4);
|
||||
EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, context.getDwarfVersion() >= 4
|
||||
? dwarf::DW_FORM_sec_offset
|
||||
: dwarf::DW_FORM_data4);
|
||||
if (context.getGenDwarfSectionSyms().size() > 1 &&
|
||||
context.getDwarfVersion() >= 3) {
|
||||
EmitAbbrev(MCOS, dwarf::DW_AT_ranges,
|
||||
context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
|
||||
EmitAbbrev(MCOS, dwarf::DW_AT_ranges, context.getDwarfVersion() >= 4
|
||||
? dwarf::DW_FORM_sec_offset
|
||||
: dwarf::DW_FORM_data4);
|
||||
} else {
|
||||
EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
//===-- MObjectFileInfo.cpp - Object File Information ---------------------===//
|
||||
//===-- MCObjectFileInfo.cpp - Object File Information --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
|
|
|||
|
|
@ -63,31 +63,30 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(const FeatureBitset &FB) {
|
|||
/// ToggleFeature - Toggle a feature and returns the re-computed feature
|
||||
/// bits. This version will also change all implied bits.
|
||||
FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) {
|
||||
SubtargetFeatures Features;
|
||||
FeatureBits = Features.ToggleFeature(FeatureBits, FS, ProcFeatures);
|
||||
SubtargetFeatures::ToggleFeature(FeatureBits, FS, ProcFeatures);
|
||||
return FeatureBits;
|
||||
}
|
||||
|
||||
FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
|
||||
SubtargetFeatures Features;
|
||||
FeatureBits = Features.ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
|
||||
SubtargetFeatures::ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
|
||||
return FeatureBits;
|
||||
}
|
||||
|
||||
const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
|
||||
assert(ProcSchedModels && "Processor machine model not available!");
|
||||
|
||||
size_t NumProcs = ProcDesc.size();
|
||||
assert(std::is_sorted(ProcSchedModels, ProcSchedModels+NumProcs,
|
||||
ArrayRef<SubtargetInfoKV> SchedModels(ProcSchedModels, ProcDesc.size());
|
||||
|
||||
assert(std::is_sorted(SchedModels.begin(), SchedModels.end(),
|
||||
[](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) {
|
||||
return strcmp(LHS.Key, RHS.Key) < 0;
|
||||
}) &&
|
||||
"Processor machine model table is not sorted");
|
||||
|
||||
// Find entry
|
||||
const SubtargetInfoKV *Found =
|
||||
std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU);
|
||||
if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) {
|
||||
auto Found =
|
||||
std::lower_bound(SchedModels.begin(), SchedModels.end(), CPU);
|
||||
if (Found == SchedModels.end() || StringRef(Found->Key) != CPU) {
|
||||
if (CPU != "help") // Don't error if the user asked for help.
|
||||
errs() << "'" << CPU
|
||||
<< "' is not a recognized processor for this target"
|
||||
|
|
|
|||
|
|
@ -160,10 +160,9 @@ void ClearImpliedBits(FeatureBitset &Bits,
|
|||
}
|
||||
}
|
||||
|
||||
/// ToggleFeature - Toggle a feature and returns the newly updated feature
|
||||
/// bits.
|
||||
FeatureBitset
|
||||
SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
|
||||
/// ToggleFeature - Toggle a feature and update the feature bits.
|
||||
void
|
||||
SubtargetFeatures::ToggleFeature(FeatureBitset &Bits, StringRef Feature,
|
||||
ArrayRef<SubtargetFeatureKV> FeatureTable) {
|
||||
|
||||
// Find feature in table.
|
||||
|
|
@ -186,12 +185,9 @@ SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
|
|||
<< "' is not a recognized feature for this target"
|
||||
<< " (ignoring feature)\n";
|
||||
}
|
||||
|
||||
return Bits;
|
||||
}
|
||||
|
||||
FeatureBitset
|
||||
SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
|
||||
void SubtargetFeatures::ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
|
||||
ArrayRef<SubtargetFeatureKV> FeatureTable) {
|
||||
|
||||
assert(hasFlag(Feature));
|
||||
|
|
@ -203,7 +199,7 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
|
|||
if (FeatureEntry) {
|
||||
// Enable/disable feature in bits
|
||||
if (isEnabled(Feature)) {
|
||||
Bits |= FeatureEntry->Value;
|
||||
Bits |= FeatureEntry->Value;
|
||||
|
||||
// For each feature that this implies, set it.
|
||||
SetImpliedBits(Bits, FeatureEntry, FeatureTable);
|
||||
|
|
@ -218,8 +214,6 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
|
|||
<< "' is not a recognized feature for this target"
|
||||
<< " (ignoring feature)\n";
|
||||
}
|
||||
|
||||
return Bits;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -234,14 +228,10 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
|
|||
return FeatureBitset();
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (size_t i = 1, e = CPUTable.size(); i != e; ++i) {
|
||||
assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
|
||||
"CPU table is not sorted");
|
||||
}
|
||||
for (size_t i = 1, e = FeatureTable.size(); i != e; ++i) {
|
||||
assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
|
||||
"CPU features table is not sorted");
|
||||
}
|
||||
assert(std::is_sorted(std::begin(CPUTable), std::end(CPUTable)) &&
|
||||
"CPU table is not sorted");
|
||||
assert(std::is_sorted(std::begin(FeatureTable), std::end(FeatureTable)) &&
|
||||
"CPU features table is not sorted");
|
||||
#endif
|
||||
// Resulting bits
|
||||
FeatureBitset Bits;
|
||||
|
|
@ -277,7 +267,7 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
|
|||
if (Feature == "+help")
|
||||
Help(CPUTable, FeatureTable);
|
||||
|
||||
Bits = ApplyFeatureFlag(Bits, Feature, FeatureTable);
|
||||
ApplyFeatureFlag(Bits, Feature, FeatureTable);
|
||||
}
|
||||
|
||||
return Bits;
|
||||
|
|
|
|||
|
|
@ -316,12 +316,17 @@ static std::error_code readCoverageMappingData(
|
|||
|
||||
// Read the records in the coverage data section.
|
||||
for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) {
|
||||
if (Buf + 4 * sizeof(uint32_t) > End)
|
||||
if (Buf + sizeof(CovMapHeader) > End)
|
||||
return coveragemap_error::malformed;
|
||||
uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf);
|
||||
uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
|
||||
uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
|
||||
uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf);
|
||||
auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf);
|
||||
uint32_t NRecords =
|
||||
endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords);
|
||||
uint32_t FilenamesSize =
|
||||
endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize);
|
||||
uint32_t CoverageSize =
|
||||
endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize);
|
||||
uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version);
|
||||
Buf = reinterpret_cast<const char *>(++CovHeader);
|
||||
|
||||
switch (Version) {
|
||||
case CoverageMappingVersion1:
|
||||
|
|
|
|||
|
|
@ -12,12 +12,15 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ProfileData/InstrProf.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/ProfileData/InstrProf.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/Compression.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/LEB128.h"
|
||||
#include "llvm/Support/ManagedStatic.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
|
@ -162,6 +165,98 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) {
|
|||
return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
|
||||
}
|
||||
|
||||
int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
|
||||
bool doCompression, std::string &Result) {
|
||||
uint8_t Header[16], *P = Header;
|
||||
std::string UncompressedNameStrings =
|
||||
join(NameStrs.begin(), NameStrs.end(), StringRef(" "));
|
||||
|
||||
unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
|
||||
P += EncLen;
|
||||
|
||||
auto WriteStringToResult = [&](size_t CompressedLen,
|
||||
const std::string &InputStr) {
|
||||
EncLen = encodeULEB128(CompressedLen, P);
|
||||
P += EncLen;
|
||||
char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
|
||||
unsigned HeaderLen = P - &Header[0];
|
||||
Result.append(HeaderStr, HeaderLen);
|
||||
Result += InputStr;
|
||||
return 0;
|
||||
};
|
||||
|
||||
if (!doCompression)
|
||||
return WriteStringToResult(0, UncompressedNameStrings);
|
||||
|
||||
SmallVector<char, 128> CompressedNameStrings;
|
||||
zlib::Status Success =
|
||||
zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
|
||||
zlib::BestSizeCompression);
|
||||
|
||||
if (Success != zlib::StatusOK)
|
||||
return 1;
|
||||
|
||||
return WriteStringToResult(
|
||||
CompressedNameStrings.size(),
|
||||
std::string(CompressedNameStrings.data(), CompressedNameStrings.size()));
|
||||
}
|
||||
|
||||
StringRef getPGOFuncNameInitializer(GlobalVariable *NameVar) {
|
||||
auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
|
||||
StringRef NameStr =
|
||||
Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
|
||||
return NameStr;
|
||||
}
|
||||
|
||||
int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
|
||||
std::string &Result) {
|
||||
std::vector<std::string> NameStrs;
|
||||
for (auto *NameVar : NameVars) {
|
||||
NameStrs.push_back(getPGOFuncNameInitializer(NameVar));
|
||||
}
|
||||
return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
|
||||
}
|
||||
|
||||
int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
|
||||
const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
|
||||
const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
|
||||
NameStrings.size());
|
||||
while (P < EndP) {
|
||||
uint32_t N;
|
||||
uint64_t UncompressedSize = decodeULEB128(P, &N);
|
||||
P += N;
|
||||
uint64_t CompressedSize = decodeULEB128(P, &N);
|
||||
P += N;
|
||||
bool isCompressed = (CompressedSize != 0);
|
||||
SmallString<128> UncompressedNameStrings;
|
||||
StringRef NameStrings;
|
||||
if (isCompressed) {
|
||||
StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
|
||||
CompressedSize);
|
||||
if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
|
||||
UncompressedSize) != zlib::StatusOK)
|
||||
return 1;
|
||||
P += CompressedSize;
|
||||
NameStrings = StringRef(UncompressedNameStrings.data(),
|
||||
UncompressedNameStrings.size());
|
||||
} else {
|
||||
NameStrings =
|
||||
StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
|
||||
P += UncompressedSize;
|
||||
}
|
||||
// Now parse the name strings.
|
||||
SmallVector<StringRef, 0> Names;
|
||||
NameStrings.split(Names, ' ');
|
||||
for (StringRef &Name : Names)
|
||||
Symtab.addFuncName(Name);
|
||||
|
||||
while (P < EndP && *P == 0)
|
||||
P++;
|
||||
}
|
||||
Symtab.finalizeSymtab();
|
||||
return 0;
|
||||
}
|
||||
|
||||
instrprof_error
|
||||
InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
|
||||
uint64_t Weight) {
|
||||
|
|
|
|||
|
|
@ -446,7 +446,7 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
|
|||
return EC;
|
||||
}
|
||||
|
||||
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
|
||||
bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) {
|
||||
static long ArgMax = sysconf(_SC_ARG_MAX);
|
||||
|
||||
// System says no practical limit.
|
||||
|
|
@ -456,7 +456,7 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
|
|||
// Conservatively account for space required by environment variables.
|
||||
long HalfArgMax = ArgMax / 2;
|
||||
|
||||
size_t ArgLength = 0;
|
||||
size_t ArgLength = Program.size() + 1;
|
||||
for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
|
||||
I != E; ++I) {
|
||||
ArgLength += strlen(*I) + 1;
|
||||
|
|
|
|||
|
|
@ -535,14 +535,15 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
|
|||
return EC;
|
||||
}
|
||||
|
||||
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
|
||||
bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) {
|
||||
// The documented max length of the command line passed to CreateProcess.
|
||||
static const size_t MaxCommandStringLength = 32768;
|
||||
size_t ArgLength = 0;
|
||||
// Account for the trailing space for the program path and the
|
||||
// trailing NULL of the last argument.
|
||||
size_t ArgLength = ArgLenWithQuotes(Program.str().c_str()) + 2;
|
||||
for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
|
||||
I != E; ++I) {
|
||||
// Account for the trailing space for every arg but the last one and the
|
||||
// trailing NULL of the last argument.
|
||||
// Account for the trailing space for every arg
|
||||
ArgLength += ArgLenWithQuotes(*I) + 1;
|
||||
if (ArgLength > MaxCommandStringLength) {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -30,6 +30,9 @@
|
|||
#define _WIN32_WINNT 0x0601
|
||||
#define _WIN32_IE 0x0800 // MinGW at it again. FIXME: verify if still needed.
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
|
|
@ -44,6 +47,21 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#if !defined(__CYGWIN__) && !defined(__MINGW32__)
|
||||
#include <VersionHelpers.h>
|
||||
#else
|
||||
// Cygwin does not have the IsWindows8OrGreater() API.
|
||||
// Some version of mingw does not have the API either.
|
||||
inline bool IsWindows8OrGreater() {
|
||||
OSVERSIONINFO osvi = {};
|
||||
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
|
||||
if (!::GetVersionEx(&osvi))
|
||||
return false;
|
||||
return (osvi.dwMajorVersion > 6 ||
|
||||
(osvi.dwMajorVersion == 6 && osvi.dwMinorVersion >= 2));
|
||||
}
|
||||
#endif // __CYGWIN__
|
||||
|
||||
inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
|
||||
if (!ErrMsg)
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -57,6 +57,10 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef LLVM_ON_WIN32
|
||||
#include "Windows/WindowsSupport.h"
|
||||
#endif
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
raw_ostream::~raw_ostream() {
|
||||
|
|
@ -567,8 +571,21 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
|
|||
assert(FD >= 0 && "File already closed.");
|
||||
pos += Size;
|
||||
|
||||
#ifndef LLVM_ON_WIN32
|
||||
bool ShouldWriteInChunks = false;
|
||||
#else
|
||||
// Writing a large size of output to Windows console returns ENOMEM. It seems
|
||||
// that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
|
||||
// the latter has a size limit (66000 bytes or less, depending on heap usage).
|
||||
bool ShouldWriteInChunks = !!::_isatty(FD) && !IsWindows8OrGreater();
|
||||
#endif
|
||||
|
||||
do {
|
||||
ssize_t ret = ::write(FD, Ptr, Size);
|
||||
size_t ChunkSize = Size;
|
||||
if (ChunkSize > 32767 && ShouldWriteInChunks)
|
||||
ChunkSize = 32767;
|
||||
|
||||
ssize_t ret = ::write(FD, Ptr, ChunkSize);
|
||||
|
||||
if (ret < 0) {
|
||||
// If it's a recoverable error, swallow it and retry the write.
|
||||
|
|
|
|||
|
|
@ -722,7 +722,7 @@ Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
|
|||
|
||||
std::string UnOpInit::getAsString() const {
|
||||
std::string Result;
|
||||
switch (Opc) {
|
||||
switch (getOpcode()) {
|
||||
case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break;
|
||||
case HEAD: Result = "!head"; break;
|
||||
case TAIL: Result = "!tail"; break;
|
||||
|
|
@ -850,7 +850,7 @@ Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
|
|||
|
||||
std::string BinOpInit::getAsString() const {
|
||||
std::string Result;
|
||||
switch (Opc) {
|
||||
switch (getOpcode()) {
|
||||
case CONCAT: Result = "!con"; break;
|
||||
case ADD: Result = "!add"; break;
|
||||
case AND: Result = "!and"; break;
|
||||
|
|
@ -1054,7 +1054,7 @@ Init *TernOpInit::resolveReferences(Record &R,
|
|||
const RecordVal *RV) const {
|
||||
Init *lhs = LHS->resolveReferences(R, RV);
|
||||
|
||||
if (Opc == IF && lhs != LHS) {
|
||||
if (getOpcode() == IF && lhs != LHS) {
|
||||
IntInit *Value = dyn_cast<IntInit>(lhs);
|
||||
if (Init *I = lhs->convertInitializerTo(IntRecTy::get()))
|
||||
Value = dyn_cast<IntInit>(I);
|
||||
|
|
@ -1082,7 +1082,7 @@ Init *TernOpInit::resolveReferences(Record &R,
|
|||
|
||||
std::string TernOpInit::getAsString() const {
|
||||
std::string Result;
|
||||
switch (Opc) {
|
||||
switch (getOpcode()) {
|
||||
case SUBST: Result = "!subst"; break;
|
||||
case FOREACH: Result = "!foreach"; break;
|
||||
case IF: Result = "!if"; break;
|
||||
|
|
|
|||
|
|
@ -77,7 +77,8 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
|
|||
/// SetValue -
|
||||
/// Return true on error, false on success.
|
||||
bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
|
||||
const std::vector<unsigned> &BitList, Init *V) {
|
||||
ArrayRef<unsigned> BitList, Init *V,
|
||||
bool AllowSelfAssignment) {
|
||||
if (!V) return false;
|
||||
|
||||
if (!CurRec) CurRec = &CurMultiClass->Rec;
|
||||
|
|
@ -91,8 +92,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
|
|||
// in the resolution machinery.
|
||||
if (BitList.empty())
|
||||
if (VarInit *VI = dyn_cast<VarInit>(V))
|
||||
if (VI->getNameInit() == ValName)
|
||||
return false;
|
||||
if (VI->getNameInit() == ValName && !AllowSelfAssignment)
|
||||
return true;
|
||||
|
||||
// If we are assigning to a subset of the bits in the value... then we must be
|
||||
// assigning to a field of BitsRecTy, which must have a BitsInit
|
||||
|
|
@ -165,7 +166,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
|
|||
if (i < SubClass.TemplateArgs.size()) {
|
||||
// If a value is specified for this template arg, set it now.
|
||||
if (SetValue(CurRec, SubClass.RefRange.Start, TArgs[i],
|
||||
std::vector<unsigned>(), SubClass.TemplateArgs[i]))
|
||||
None, SubClass.TemplateArgs[i]))
|
||||
return true;
|
||||
|
||||
// Resolve it next.
|
||||
|
|
@ -243,8 +244,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
|
|||
// If a value is specified for this template arg, set it in the
|
||||
// superclass now.
|
||||
if (SetValue(CurRec, SubMultiClass.RefRange.Start, SMCTArgs[i],
|
||||
std::vector<unsigned>(),
|
||||
SubMultiClass.TemplateArgs[i]))
|
||||
None, SubMultiClass.TemplateArgs[i]))
|
||||
return true;
|
||||
|
||||
// Resolve it next.
|
||||
|
|
@ -258,8 +258,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
|
|||
for (const auto &Def :
|
||||
makeArrayRef(CurMC->DefPrototypes).slice(newDefStart)) {
|
||||
if (SetValue(Def.get(), SubMultiClass.RefRange.Start, SMCTArgs[i],
|
||||
std::vector<unsigned>(),
|
||||
SubMultiClass.TemplateArgs[i]))
|
||||
None, SubMultiClass.TemplateArgs[i]))
|
||||
return true;
|
||||
|
||||
// Resolve it next.
|
||||
|
|
@ -332,8 +331,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
|
|||
|
||||
IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false));
|
||||
|
||||
if (SetValue(IterRec.get(), Loc, IterVar->getName(),
|
||||
std::vector<unsigned>(), IVal))
|
||||
if (SetValue(IterRec.get(), Loc, IterVar->getName(), None, IVal))
|
||||
return Error(Loc, "when instantiating this def");
|
||||
|
||||
// Resolve it next.
|
||||
|
|
@ -1728,7 +1726,7 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
|
|||
SMLoc ValLoc = Lex.getLoc();
|
||||
Init *Val = ParseValue(CurRec, Type);
|
||||
if (!Val ||
|
||||
SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
|
||||
SetValue(CurRec, ValLoc, DeclName, None, Val))
|
||||
// Return the name, even if an error is thrown. This is so that we can
|
||||
// continue to make some progress, even without the value having been
|
||||
// initialized.
|
||||
|
|
@ -2358,8 +2356,8 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
|
|||
// Set the value for NAME. We don't resolve references to it 'til later,
|
||||
// though, so that uses in nested multiclass names don't get
|
||||
// confused.
|
||||
if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME",
|
||||
std::vector<unsigned>(), DefmPrefix)) {
|
||||
if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME", None, DefmPrefix,
|
||||
/*AllowSelfAssignment*/true)) {
|
||||
Error(DefmPrefixRange.Start, "Could not resolve " +
|
||||
CurRec->getNameInitAsString() + ":NAME to '" +
|
||||
DefmPrefix->getAsUnquotedString() + "'");
|
||||
|
|
@ -2446,8 +2444,7 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, Record *CurRec,
|
|||
// Check if a value is specified for this temp-arg.
|
||||
if (i < TemplateVals.size()) {
|
||||
// Set it now.
|
||||
if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(),
|
||||
TemplateVals[i]))
|
||||
if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], None, TemplateVals[i]))
|
||||
return true;
|
||||
|
||||
// Resolve it next.
|
||||
|
|
|
|||
|
|
@ -105,10 +105,13 @@ public:
|
|||
private: // Semantic analysis methods.
|
||||
bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
|
||||
bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName,
|
||||
const std::vector<unsigned> &BitList, Init *V);
|
||||
ArrayRef<unsigned> BitList, Init *V,
|
||||
bool AllowSelfAssignment = false);
|
||||
bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName,
|
||||
const std::vector<unsigned> &BitList, Init *V) {
|
||||
return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V);
|
||||
ArrayRef<unsigned> BitList, Init *V,
|
||||
bool AllowSelfAssignment = false) {
|
||||
return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V,
|
||||
AllowSelfAssignment);
|
||||
}
|
||||
bool AddSubClass(Record *Rec, SubClassReference &SubClass);
|
||||
bool AddSubMultiClass(MultiClass *CurMC,
|
||||
|
|
|
|||
|
|
@ -124,6 +124,14 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
|
|||
FeaturePerfMon,
|
||||
FeatureZCRegMove, FeatureZCZeroing]>;
|
||||
|
||||
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
|
||||
"Samsung Exynos-M1 processors",
|
||||
[FeatureFPARMv8,
|
||||
FeatureNEON,
|
||||
FeatureCrypto,
|
||||
FeatureCRC,
|
||||
FeaturePerfMon]>;
|
||||
|
||||
def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
|
||||
FeatureNEON,
|
||||
FeatureCRC,
|
||||
|
|
@ -136,6 +144,8 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
|
|||
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
|
||||
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
|
||||
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
|
||||
// FIXME: Exynos-M1 is currently modelled without a specific SchedModel.
|
||||
def : ProcessorModel<"exynos-m1", NoSchedModel, [ProcExynosM1]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Assembly parser
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
|
|||
"AArch64 A57 FP Load-Balancing", false, false)
|
||||
|
||||
namespace {
|
||||
/// A Chain is a sequence of instructions that are linked together by
|
||||
/// A Chain is a sequence of instructions that are linked together by
|
||||
/// an accumulation operand. For example:
|
||||
///
|
||||
/// fmul d0<def>, ?
|
||||
|
|
@ -285,7 +285,7 @@ public:
|
|||
std::string str() const {
|
||||
std::string S;
|
||||
raw_string_ostream OS(S);
|
||||
|
||||
|
||||
OS << "{";
|
||||
StartInst->print(OS, /* SkipOpers= */true);
|
||||
OS << " -> ";
|
||||
|
|
@ -427,7 +427,7 @@ Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
|
|||
return Ch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Bailout case - just return the first item.
|
||||
Chain *Ch = L.front();
|
||||
L.erase(L.begin());
|
||||
|
|
@ -495,7 +495,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
|
|||
RS.enterBasicBlock(&MBB);
|
||||
RS.forward(MachineBasicBlock::iterator(G->getStart()));
|
||||
|
||||
// Can we find an appropriate register that is available throughout the life
|
||||
// Can we find an appropriate register that is available throughout the life
|
||||
// of the chain?
|
||||
unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
|
||||
BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));
|
||||
|
|
|
|||
|
|
@ -2426,7 +2426,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
|
|||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (VA.isRegLoc()) {
|
||||
// Arguments stored in registers.
|
||||
EVT RegVT = VA.getLocVT();
|
||||
|
|
@ -5074,7 +5074,7 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
|
|||
|
||||
// The index of an EXT is the first element if it is not UNDEF.
|
||||
// Watch out for the beginning UNDEFs. The EXT index should be the expected
|
||||
// value of the first element. E.g.
|
||||
// value of the first element. E.g.
|
||||
// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
|
||||
// <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
|
||||
// ExpectedElt is the last mask index plus 1.
|
||||
|
|
@ -9491,6 +9491,103 @@ static SDValue performBRCONDCombine(SDNode *N,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
|
||||
// as well as whether the test should be inverted. This code is required to
|
||||
// catch these cases (as opposed to standard dag combines) because
|
||||
// AArch64ISD::TBZ is matched during legalization.
|
||||
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
|
||||
SelectionDAG &DAG) {
|
||||
|
||||
if (!Op->hasOneUse())
|
||||
return Op;
|
||||
|
||||
// We don't handle undef/constant-fold cases below, as they should have
|
||||
// already been taken care of (e.g. and of 0, test of undefined shifted bits,
|
||||
// etc.)
|
||||
|
||||
// (tbz (trunc x), b) -> (tbz x, b)
|
||||
// This case is just here to enable more of the below cases to be caught.
|
||||
if (Op->getOpcode() == ISD::TRUNCATE &&
|
||||
Bit < Op->getValueType(0).getSizeInBits()) {
|
||||
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
|
||||
}
|
||||
|
||||
if (Op->getNumOperands() != 2)
|
||||
return Op;
|
||||
|
||||
auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
|
||||
if (!C)
|
||||
return Op;
|
||||
|
||||
switch (Op->getOpcode()) {
|
||||
default:
|
||||
return Op;
|
||||
|
||||
// (tbz (and x, m), b) -> (tbz x, b)
|
||||
case ISD::AND:
|
||||
if ((C->getZExtValue() >> Bit) & 1)
|
||||
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
|
||||
return Op;
|
||||
|
||||
// (tbz (shl x, c), b) -> (tbz x, b-c)
|
||||
case ISD::SHL:
|
||||
if (C->getZExtValue() <= Bit &&
|
||||
(Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
|
||||
Bit = Bit - C->getZExtValue();
|
||||
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
|
||||
}
|
||||
return Op;
|
||||
|
||||
// (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
|
||||
case ISD::SRA:
|
||||
Bit = Bit + C->getZExtValue();
|
||||
if (Bit >= Op->getValueType(0).getSizeInBits())
|
||||
Bit = Op->getValueType(0).getSizeInBits() - 1;
|
||||
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
|
||||
|
||||
// (tbz (srl x, c), b) -> (tbz x, b+c)
|
||||
case ISD::SRL:
|
||||
if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
|
||||
Bit = Bit + C->getZExtValue();
|
||||
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
|
||||
}
|
||||
return Op;
|
||||
|
||||
// (tbz (xor x, -1), b) -> (tbnz x, b)
|
||||
case ISD::XOR:
|
||||
if ((C->getZExtValue() >> Bit) & 1)
|
||||
Invert = !Invert;
|
||||
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
// Optimize test single bit zero/non-zero and branch.
|
||||
static SDValue performTBZCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
SelectionDAG &DAG) {
|
||||
unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
|
||||
bool Invert = false;
|
||||
SDValue TestSrc = N->getOperand(1);
|
||||
SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
|
||||
|
||||
if (TestSrc == NewTestSrc)
|
||||
return SDValue();
|
||||
|
||||
unsigned NewOpc = N->getOpcode();
|
||||
if (Invert) {
|
||||
if (NewOpc == AArch64ISD::TBZ)
|
||||
NewOpc = AArch64ISD::TBNZ;
|
||||
else {
|
||||
assert(NewOpc == AArch64ISD::TBNZ);
|
||||
NewOpc = AArch64ISD::TBZ;
|
||||
}
|
||||
}
|
||||
|
||||
SDLoc DL(N);
|
||||
return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
|
||||
DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
|
||||
}
|
||||
|
||||
// vselect (v1i1 setcc) ->
|
||||
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
|
||||
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
|
||||
|
|
@ -9642,6 +9739,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
return performSTORECombine(N, DCI, DAG, Subtarget);
|
||||
case AArch64ISD::BRCOND:
|
||||
return performBRCONDCombine(N, DCI, DAG);
|
||||
case AArch64ISD::TBNZ:
|
||||
case AArch64ISD::TBZ:
|
||||
return performTBZCombine(N, DCI, DAG);
|
||||
case AArch64ISD::CSEL:
|
||||
return performCONDCombine(N, DCI, DAG, 2, 3);
|
||||
case AArch64ISD::DUP:
|
||||
|
|
|
|||
|
|
@ -613,21 +613,6 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
|
|||
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
|
||||
}
|
||||
|
||||
// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
|
||||
static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
|
||||
MachineInstr *Op1) {
|
||||
assert(MI->memoperands_empty() && "expected a new machineinstr");
|
||||
size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) +
|
||||
(Op1->memoperands_end() - Op1->memoperands_begin());
|
||||
|
||||
MachineFunction *MF = MI->getParent()->getParent();
|
||||
MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
|
||||
MachineSDNode::mmo_iterator MemEnd =
|
||||
std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
|
||||
MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
|
||||
MI->setMemRefs(MemBegin, MemEnd);
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
|
||||
MachineBasicBlock::iterator Paired,
|
||||
|
|
@ -692,10 +677,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
|
|||
TII->get(NewOpc))
|
||||
.addOperand(getLdStRegOp(RtNewDest))
|
||||
.addOperand(BaseRegOp)
|
||||
.addImm(OffsetImm);
|
||||
|
||||
// Copy MachineMemOperands from the original loads.
|
||||
concatenateMemOperands(NewMemMI, I, Paired);
|
||||
.addImm(OffsetImm)
|
||||
.setMemRefs(I->mergeMemRefsWith(*Paired));
|
||||
|
||||
DEBUG(
|
||||
dbgs()
|
||||
|
|
@ -786,9 +769,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
|
|||
TII->get(NewOpc))
|
||||
.addOperand(getLdStRegOp(I))
|
||||
.addOperand(BaseRegOp)
|
||||
.addImm(OffsetImm);
|
||||
// Copy MachineMemOperands from the original stores.
|
||||
concatenateMemOperands(MIB, I, Paired);
|
||||
.addImm(OffsetImm)
|
||||
.setMemRefs(I->mergeMemRefsWith(*Paired));
|
||||
} else {
|
||||
// Handle Unscaled
|
||||
if (IsUnscaled)
|
||||
|
|
|
|||
|
|
@ -33,7 +33,14 @@ class Triple;
|
|||
|
||||
class AArch64Subtarget : public AArch64GenSubtargetInfo {
|
||||
protected:
|
||||
enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone};
|
||||
enum ARMProcFamilyEnum {
|
||||
Others,
|
||||
CortexA35,
|
||||
CortexA53,
|
||||
CortexA57,
|
||||
Cyclone,
|
||||
ExynosM1
|
||||
};
|
||||
|
||||
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
|
||||
ARMProcFamilyEnum ARMProcFamily;
|
||||
|
|
@ -143,6 +150,7 @@ public:
|
|||
bool isCyclone() const { return CPUString == "cyclone"; }
|
||||
bool isCortexA57() const { return CPUString == "cortex-a57"; }
|
||||
bool isCortexA53() const { return CPUString == "cortex-a53"; }
|
||||
bool isExynosM1() const { return CPUString == "exynos-m1"; }
|
||||
|
||||
bool useAA() const override { return isCortexA53(); }
|
||||
|
||||
|
|
|
|||
|
|
@ -834,7 +834,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings
|
|||
};
|
||||
|
||||
uint32_t
|
||||
AArch64SysReg::SysRegMapper::fromString(StringRef Name,
|
||||
AArch64SysReg::SysRegMapper::fromString(StringRef Name,
|
||||
const FeatureBitset& FeatureBits, bool &Valid) const {
|
||||
std::string NameLower = Name.lower();
|
||||
|
||||
|
|
@ -878,7 +878,7 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name,
|
|||
}
|
||||
|
||||
std::string
|
||||
AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
|
||||
AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
|
||||
const FeatureBitset& FeatureBits) const {
|
||||
// First search the registers shared by all
|
||||
for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
|
||||
|
|
|
|||
|
|
@ -285,17 +285,17 @@ struct AArch64NamedImmMapper {
|
|||
// Zero value of FeatureBitSet means the mapping is always available
|
||||
FeatureBitset FeatureBitSet;
|
||||
|
||||
bool isNameEqual(std::string Other,
|
||||
bool isNameEqual(std::string Other,
|
||||
const FeatureBitset& FeatureBits) const {
|
||||
if (FeatureBitSet.any() &&
|
||||
if (FeatureBitSet.any() &&
|
||||
(FeatureBitSet & FeatureBits).none())
|
||||
return false;
|
||||
return Name == Other;
|
||||
}
|
||||
|
||||
bool isValueEqual(uint32_t Other,
|
||||
bool isValueEqual(uint32_t Other,
|
||||
const FeatureBitset& FeatureBits) const {
|
||||
if (FeatureBitSet.any() &&
|
||||
if (FeatureBitSet.any() &&
|
||||
(FeatureBitSet & FeatureBits).none())
|
||||
return false;
|
||||
return Value == Other;
|
||||
|
|
@ -310,7 +310,7 @@ struct AArch64NamedImmMapper {
|
|||
StringRef toString(uint32_t Value, const FeatureBitset& FeatureBits,
|
||||
bool &Valid) const;
|
||||
// Maps string to value, depending on availability for FeatureBits given
|
||||
uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
|
||||
uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
|
||||
bool &Valid) const;
|
||||
|
||||
/// Many of the instructions allow an alternative assembly form consisting of
|
||||
|
|
@ -1322,7 +1322,7 @@ namespace AArch64TLBI {
|
|||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace AArch64II {
|
||||
/// Target Operand Flag enum.
|
||||
|
|
|
|||
|
|
@ -118,6 +118,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
|
|||
"true",
|
||||
"Support flat address space">;
|
||||
|
||||
def FeatureXNACK : SubtargetFeature<"xnack",
|
||||
"EnableXNACK",
|
||||
"true",
|
||||
"Enable XNACK support">;
|
||||
|
||||
def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
|
||||
"EnableVGPRSpilling",
|
||||
"true",
|
||||
|
|
|
|||
|
|
@ -417,13 +417,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||
}
|
||||
}
|
||||
|
||||
if (VCCUsed || FlatUsed)
|
||||
if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) {
|
||||
MaxSGPR += 2;
|
||||
|
||||
if (FlatUsed) {
|
||||
MaxSGPR += 2;
|
||||
// 2 additional for VI+.
|
||||
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
if (FlatUsed)
|
||||
MaxSGPR += 2;
|
||||
|
||||
if (STM.isXNACKEnabled())
|
||||
MaxSGPR += 2;
|
||||
}
|
||||
|
||||
|
|
@ -620,6 +620,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
|||
if (MFI->hasDispatchPtr())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
|
||||
|
||||
if (STM.isXNACKEnabled())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
|
||||
|
||||
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
|
||||
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
|
||||
header.workitem_vgpr_count = KernelInfo.NumVGPR;
|
||||
|
|
|
|||
|
|
@ -204,14 +204,6 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
|
|||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
|
||||
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
|
||||
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
|
||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||
}]>;
|
||||
|
|
@ -243,14 +235,6 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
|
|||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
|
||||
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
|
||||
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
|
||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||
}]>;
|
||||
|
|
@ -299,16 +283,6 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
|
|||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei8 node:$val, node:$ptr), [{
|
||||
return isFlatStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstorei16 node:$val, node:$ptr), [{
|
||||
return isFlatStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def local_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||
|
|
@ -385,15 +359,6 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
|
|||
|
||||
defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
|
||||
|
||||
def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def flat_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return isFlatStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
|
||||
(AMDGPUstore_mskor node:$val, node:$ptr), [{
|
||||
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
|
||||
EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
|
||||
EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
|
||||
EnableXNACK(false),
|
||||
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
||||
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
|
||||
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ private:
|
|||
bool EnableIfCvt;
|
||||
bool EnableLoadStoreOpt;
|
||||
bool EnableUnsafeDSOffsetFolding;
|
||||
bool EnableXNACK;
|
||||
unsigned WavefrontSize;
|
||||
bool CFALUBug;
|
||||
int LocalMemorySize;
|
||||
|
|
@ -290,6 +291,10 @@ public:
|
|||
}
|
||||
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
|
||||
|
||||
bool isXNACKEnabled() const {
|
||||
return EnableXNACK;
|
||||
}
|
||||
|
||||
unsigned getMaxWavesPerCU() const {
|
||||
if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
||||
return 10;
|
||||
|
|
|
|||
|
|
@ -264,42 +264,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
|
|||
|
||||
} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Flat Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [HasFlatAddressSpace] in {
|
||||
|
||||
class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
|
||||
PatFrag flat_ld> :
|
||||
Pat <(vt (flat_ld i64:$ptr)),
|
||||
(Instr_ADDR64 $ptr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
|
||||
def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
|
||||
def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
|
||||
def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
|
||||
def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
|
||||
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
|
||||
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
|
||||
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
|
||||
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
|
||||
|
||||
class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
|
||||
Pat <(st vt:$value, i64:$ptr),
|
||||
(Instr $value, $ptr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
|
||||
def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
|
||||
def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
|
||||
def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
|
||||
def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
|
||||
def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
|
||||
|
||||
} // End HasFlatAddressSpace predicate
|
||||
|
||||
let Predicates = [isCI] in {
|
||||
|
||||
// Convert (x - floor(x)) to fract(x)
|
||||
|
|
@ -320,20 +284,10 @@ def : Pat <
|
|||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Patterns to generate flat for global
|
||||
// Flat Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def useFlatForGlobal : Predicate <
|
||||
"Subtarget->useFlatForGlobal() || "
|
||||
"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">;
|
||||
|
||||
let Predicates = [useFlatForGlobal] in {
|
||||
|
||||
// 1. Offset as 20bit DWORD immediate
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
|
||||
>;
|
||||
let Predicates = [isCIVI] in {
|
||||
|
||||
// Patterns for global loads with no offset
|
||||
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
|
|
@ -341,24 +295,24 @@ class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
|||
(inst $addr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
|
||||
|
||||
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(node vt:$data, i64:$addr),
|
||||
(inst $data, $addr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
|
||||
def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
|
||||
|
||||
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr, vt:$data)),
|
||||
|
|
@ -376,4 +330,4 @@ def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
|
|||
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
|
||||
|
||||
} // End Predicates = [useFlatForGlobal]
|
||||
} // End Predicates = [isCIVI]
|
||||
|
|
|
|||
|
|
@ -105,51 +105,53 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
MBB.addLiveIn(PreloadedPrivateBufferReg);
|
||||
}
|
||||
|
||||
// We reserved the last registers for this. Shift it down to the end of those
|
||||
// which were actually used.
|
||||
//
|
||||
// FIXME: It might be safer to use a pseudoregister before replacement.
|
||||
if (!ST.hasSGPRInitBug()) {
|
||||
// We reserved the last registers for this. Shift it down to the end of those
|
||||
// which were actually used.
|
||||
//
|
||||
// FIXME: It might be safer to use a pseudoregister before replacement.
|
||||
|
||||
// FIXME: We should be able to eliminate unused input registers. We only
|
||||
// cannot do this for the resources required for scratch access. For now we
|
||||
// skip over user SGPRs and may leave unused holes.
|
||||
// FIXME: We should be able to eliminate unused input registers. We only
|
||||
// cannot do this for the resources required for scratch access. For now we
|
||||
// skip over user SGPRs and may leave unused holes.
|
||||
|
||||
// We find the resource first because it has an alignment requirement.
|
||||
if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
// We find the resource first because it has an alignment requirement.
|
||||
if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
|
||||
// Skip the last 2 elements because the last one is reserved for VCC, and
|
||||
// this is the 2nd to last element already.
|
||||
for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
|
||||
// Pick the first unallocated one. Make sure we don't clobber the other
|
||||
// reserved input we needed.
|
||||
if (!MRI.isPhysRegUsed(Reg)) {
|
||||
assert(MRI.isAllocatable(Reg));
|
||||
MRI.replaceRegWith(ScratchRsrcReg, Reg);
|
||||
ScratchRsrcReg = Reg;
|
||||
MFI->setScratchRSrcReg(ScratchRsrcReg);
|
||||
break;
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
|
||||
// Skip the last 2 elements because the last one is reserved for VCC, and
|
||||
// this is the 2nd to last element already.
|
||||
for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
|
||||
// Pick the first unallocated one. Make sure we don't clobber the other
|
||||
// reserved input we needed.
|
||||
if (!MRI.isPhysRegUsed(Reg)) {
|
||||
assert(MRI.isAllocatable(Reg));
|
||||
MRI.replaceRegWith(ScratchRsrcReg, Reg);
|
||||
ScratchRsrcReg = Reg;
|
||||
MFI->setScratchRSrcReg(ScratchRsrcReg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
// Skip the last 2 elements because the last one is reserved for VCC, and
|
||||
// this is the 2nd to last element already.
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
|
||||
for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
|
||||
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
|
||||
// scratch descriptor, since we haven’t added its uses yet.
|
||||
if (!MRI.isPhysRegUsed(Reg)) {
|
||||
assert(MRI.isAllocatable(Reg) &&
|
||||
!TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
|
||||
if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
// Skip the last 2 elements because the last one is reserved for VCC, and
|
||||
// this is the 2nd to last element already.
|
||||
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
|
||||
for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
|
||||
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
|
||||
// scratch descriptor, since we haven’t added its uses yet.
|
||||
if (!MRI.isPhysRegUsed(Reg)) {
|
||||
assert(MRI.isAllocatable(Reg) &&
|
||||
!TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
|
||||
|
||||
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
|
||||
ScratchWaveOffsetReg = Reg;
|
||||
MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
|
||||
break;
|
||||
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
|
||||
ScratchWaveOffsetReg = Reg;
|
||||
MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -134,6 +134,34 @@ def SIconstdata_ptr : SDNode<
|
|||
SDTCisVT<0, i64>]>
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PatFrags for FLAT instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
|
||||
(ld node:$ptr), [{
|
||||
return isFlatLoad(dyn_cast<LoadSDNode>(N)) ||
|
||||
isGlobalLoad(dyn_cast<LoadSDNode>(N)) ||
|
||||
isConstantLoad(cast<LoadSDNode>(N), -1);
|
||||
}]>;
|
||||
|
||||
def flat_load : flat_ld <load>;
|
||||
def flat_az_extloadi8 : flat_ld <az_extloadi8>;
|
||||
def flat_sextloadi8 : flat_ld <sextloadi8>;
|
||||
def flat_az_extloadi16 : flat_ld <az_extloadi16>;
|
||||
def flat_sextloadi16 : flat_ld <sextloadi16>;
|
||||
|
||||
class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
|
||||
(st node:$val, node:$ptr), [{
|
||||
return isFlatStore(dyn_cast<StoreSDNode>(N)) ||
|
||||
isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def flat_store: flat_st <store>;
|
||||
def flat_truncstorei8 : flat_st <truncstorei8>;
|
||||
def flat_truncstorei16 : flat_st <truncstorei16>;
|
||||
|
||||
|
||||
def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
|
||||
return isGlobalLoad(cast<LoadSDNode>(N)) ||
|
||||
isConstantLoad(cast<LoadSDNode>(N), -1);
|
||||
|
|
|
|||
|
|
@ -59,8 +59,6 @@ defm EXP : EXP_m;
|
|||
// SMRD Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let mayLoad = 1 in {
|
||||
|
||||
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
|
||||
// SMRD instructions, because the SGPR_32 register class does not include M0
|
||||
// and writing to M0 from an SMRD instruction will hang the GPU.
|
||||
|
|
@ -90,8 +88,6 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
|
|||
smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
|
||||
>;
|
||||
|
||||
} // mayLoad = 1
|
||||
|
||||
//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
|
||||
|
||||
defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",
|
||||
|
|
|
|||
|
|
@ -156,6 +156,17 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
|
|||
|
||||
if (!LaneVGPRs.count(LaneVGPRIdx)) {
|
||||
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
|
||||
|
||||
if (LaneVGPR == AMDGPU::NoRegister) {
|
||||
LLVMContext &Ctx = MF->getFunction()->getContext();
|
||||
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
|
||||
|
||||
// When compiling from inside Mesa, the compilation continues.
|
||||
// Select an arbitrary register to avoid triggering assertions
|
||||
// during subsequent passes.
|
||||
LaneVGPR = AMDGPU::VGPR0;
|
||||
}
|
||||
|
||||
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
|
||||
|
||||
// Add this register as live-in to all blocks to avoid machine verifer
|
||||
|
|
|
|||
|
|
@ -37,13 +37,17 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
|
|||
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.hasSGPRInitBug()) {
|
||||
unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
|
||||
if (ST.isXNACKEnabled())
|
||||
BaseIdx -= 4;
|
||||
|
||||
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
|
||||
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
|
||||
}
|
||||
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
// 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the
|
||||
// next sgpr128 down.
|
||||
// 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and
|
||||
// 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down
|
||||
// either way.
|
||||
return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
|
||||
}
|
||||
|
||||
|
|
@ -54,13 +58,25 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
|
|||
const MachineFunction &MF) const {
|
||||
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.hasSGPRInitBug()) {
|
||||
unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
|
||||
unsigned Idx;
|
||||
|
||||
if (!ST.isXNACKEnabled())
|
||||
Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
|
||||
else
|
||||
Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
|
||||
|
||||
return AMDGPU::SGPR_32RegClass.getRegister(Idx);
|
||||
}
|
||||
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
// Next register before reservations for flat_scr and vcc.
|
||||
return AMDGPU::SGPR97;
|
||||
if (!ST.isXNACKEnabled()) {
|
||||
// Next register before reservations for flat_scr and vcc.
|
||||
return AMDGPU::SGPR97;
|
||||
} else {
|
||||
// Next register before reservations for flat_scr, xnack_mask, vcc,
|
||||
// and scratch resource.
|
||||
return AMDGPU::SGPR91;
|
||||
}
|
||||
}
|
||||
|
||||
return AMDGPU::SGPR95;
|
||||
|
|
@ -86,6 +102,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||
// for VCC/FLAT_SCR.
|
||||
reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
|
||||
|
||||
if (ST.isXNACKEnabled())
|
||||
reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
|
||||
}
|
||||
|
||||
// Tonga and Iceland can only allocate a fixed number of SGPRs due
|
||||
|
|
@ -93,9 +112,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||
if (ST.hasSGPRInitBug()) {
|
||||
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
|
||||
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
|
||||
// Assume XNACK_MASK is unused.
|
||||
unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
|
||||
|
||||
if (ST.isXNACKEnabled())
|
||||
Limit -= 2;
|
||||
|
||||
for (unsigned i = Limit; i < NumSGPRs; ++i) {
|
||||
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
|
||||
reserveRegisterTuples(Reserved, Reg);
|
||||
|
|
@ -282,11 +303,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|||
struct SIMachineFunctionInfo::SpilledReg Spill =
|
||||
MFI->getSpilledReg(MF, Index, i);
|
||||
|
||||
if (Spill.VGPR == AMDGPU::NoRegister) {
|
||||
LLVMContext &Ctx = MF->getFunction()->getContext();
|
||||
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
|
||||
}
|
||||
|
||||
BuildMI(*MBB, MI, DL,
|
||||
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
|
||||
Spill.VGPR)
|
||||
|
|
@ -315,11 +331,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|||
struct SIMachineFunctionInfo::SpilledReg Spill =
|
||||
MFI->getSpilledReg(MF, Index, i);
|
||||
|
||||
if (Spill.VGPR == AMDGPU::NoRegister) {
|
||||
LLVMContext &Ctx = MF->getFunction()->getContext();
|
||||
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
|
||||
}
|
||||
|
||||
BuildMI(*MBB, MI, DL,
|
||||
TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
|
||||
SubReg)
|
||||
|
|
|
|||
|
|
@ -101,3 +101,12 @@ def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
|
|||
|
||||
} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
|
||||
// 1. Offset as 20bit DWORD immediate
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
|
||||
>;
|
||||
|
||||
} // End Predicates = [isVI]
|
||||
|
|
|
|||
|
|
@ -252,6 +252,8 @@ def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
|
|||
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
|
||||
"Swift ARM processors", []>;
|
||||
|
||||
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
|
||||
"Samsung Exynos-M1 processors", []>;
|
||||
|
||||
def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
|
||||
"Cortex-R4 ARM processors", []>;
|
||||
|
|
@ -649,6 +651,12 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
|
|||
FeatureCrypto,
|
||||
FeatureZCZeroing]>;
|
||||
|
||||
def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
|
||||
FeatureHWDiv,
|
||||
FeatureHWDivARM,
|
||||
FeatureT2XtPk,
|
||||
FeatureCrypto,
|
||||
FeatureCRC]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File Description
|
||||
|
|
|
|||
|
|
@ -340,12 +340,12 @@ namespace {
|
|||
/// verify - check BBOffsets, BBSizes, alignment of islands
|
||||
void ARMConstantIslands::verify() {
|
||||
#ifndef NDEBUG
|
||||
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
|
||||
MBBI != E; ++MBBI) {
|
||||
MachineBasicBlock *MBB = &*MBBI;
|
||||
unsigned MBBId = MBB->getNumber();
|
||||
assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
|
||||
}
|
||||
assert(std::is_sorted(MF->begin(), MF->end(),
|
||||
[this](const MachineBasicBlock &LHS,
|
||||
const MachineBasicBlock &RHS) {
|
||||
return BBInfo[LHS.getNumber()].postOffset() <
|
||||
BBInfo[RHS.getNumber()].postOffset();
|
||||
}));
|
||||
DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
|
||||
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
|
||||
CPUser &U = CPUsers[i];
|
||||
|
|
|
|||
|
|
@ -1986,23 +1986,6 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
|
|||
return AddedRegPressure.size() <= MemRegs.size() * 2;
|
||||
}
|
||||
|
||||
|
||||
/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
|
||||
static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
|
||||
MachineInstr *Op1) {
|
||||
assert(MI->memoperands_empty() && "expected a new machineinstr");
|
||||
size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
|
||||
+ (Op1->memoperands_end() - Op1->memoperands_begin());
|
||||
|
||||
MachineFunction *MF = MI->getParent()->getParent();
|
||||
MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
|
||||
MachineSDNode::mmo_iterator MemEnd =
|
||||
std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
|
||||
MemEnd =
|
||||
std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
|
||||
MI->setMemRefs(MemBegin, MemEnd);
|
||||
}
|
||||
|
||||
bool
|
||||
ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
|
||||
DebugLoc &dl, unsigned &NewOpc,
|
||||
|
|
@ -2196,7 +2179,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
|
|||
if (!isT2)
|
||||
MIB.addReg(0);
|
||||
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
concatenateMemOperands(MIB, Op0, Op1);
|
||||
MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
|
||||
DEBUG(dbgs() << "Formed " << *MIB << "\n");
|
||||
++NumLDRDFormed;
|
||||
} else {
|
||||
|
|
@ -2210,7 +2193,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
|
|||
if (!isT2)
|
||||
MIB.addReg(0);
|
||||
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
concatenateMemOperands(MIB, Op0, Op1);
|
||||
MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
|
||||
DEBUG(dbgs() << "Formed " << *MIB << "\n");
|
||||
++NumSTRDFormed;
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue